From dace41e3d4c3dfc3a52fa73fcba322082dcce22a Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Mon, 13 Mar 2017 23:55:19 -0300 Subject: [PATCH] Continue scrape with duplicated metrics Problems of a single collector, like duplicated metrics read via the textfile collector, should not fail the collection and export of other metrics. --- node_exporter.go | 10 +++- node_exporter_test.go | 132 +++++++++++++++++++++++++++--------------- 2 files changed, 93 insertions(+), 49 deletions(-) diff --git a/node_exporter.go b/node_exporter.go index 1a505cfd..1028ed8c 100644 --- a/node_exporter.go +++ b/node_exporter.go @@ -158,10 +158,16 @@ func main() { log.Infof(" - %s", n) } - prometheus.MustRegister(NodeCollector{collectors: collectors}) + if err := prometheus.Register(NodeCollector{collectors: collectors}); err != nil { + log.Fatalf("Couldn't register collector: %s", err) + } handler := promhttp.HandlerFor(prometheus.DefaultGatherer, - promhttp.HandlerOpts{ErrorLog: log.NewErrorLogger()}) + promhttp.HandlerOpts{ + ErrorLog: log.NewErrorLogger(), + ErrorHandling: promhttp.ContinueOnError, + }) + // TODO(ts): Remove deprecated and problematic InstrumentHandler usage. http.Handle(*metricsPath, prometheus.InstrumentHandler("prometheus", handler)) http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { w.Write([]byte(` diff --git a/node_exporter_test.go b/node_exporter_test.go index 4ff0caa2..d1830ec2 100644 --- a/node_exporter_test.go +++ b/node_exporter_test.go @@ -2,21 +2,23 @@ package main import ( "fmt" + "io/ioutil" "net/http" "os" "os/exec" + "path/filepath" "testing" "time" "github.com/prometheus/procfs" ) -func TestFileDescriptorLeak(t *testing.T) { - const ( - binary = "./node_exporter" - address = "localhost:9100" - ) +const ( + binary = "./node_exporter" + address = "localhost:19100" +) +func TestFileDescriptorLeak(t *testing.T) { if _, err := os.Stat(binary); err != nil { t.Skipf("node_exporter binary not available, try to run `make build` first: %s", err) } @@ -24,75 +26,111 @@ func TestFileDescriptorLeak(t *testing.T) { t.Skipf("proc filesystem is not available, but currently required to read number of open file descriptors: %s", err) } - errc := make(chan error) exporter := exec.Command(binary, "-web.listen-address", address) - go func() { - if err := exporter.Run(); err != nil { - errc <- fmt.Errorf("execution of node_exporter failed: %s", err) - } else { - errc <- nil - } - }() - - select { - case err := <-errc: - t.Fatal(err) - case <-time.After(100 * time.Millisecond): - } - - go func(pid int, url string) { - if err := queryExporter(url); err != nil { - errc <- err - return + test := func(pid int) error { + if err := queryExporter(address); err != nil { + return err } proc, err := procfs.NewProc(pid) if err != nil { - errc <- err - return + return err } fdsBefore, err := proc.FileDescriptors() if err != nil { - errc <- err - return + return err } for i := 0; i < 5; i++ { - if err := queryExporter(url); err != nil { - errc <- err - return + if err := queryExporter(address); err != nil { + return err } } fdsAfter, err := proc.FileDescriptors() if err != nil { - errc <- err - return + return err } if want, have := len(fdsBefore), len(fdsAfter); want != have { - errc <- fmt.Errorf("want %d open file descriptors after metrics scrape, have %d", want, have) + return fmt.Errorf("want %d open file descriptors after metrics scrape, have %d", want, have) } - errc <- nil - }(exporter.Process.Pid, fmt.Sprintf("http://%s/metrics", address)) + return nil + } - select { - case err := <-errc: - if exporter.Process != nil { - exporter.Process.Kill() - } - if err != nil { - t.Fatal(err) - } + if err := runCommandAndTests(exporter, test); err != nil { + t.Error(err) } } -func queryExporter(url string) error { - resp, err := http.Get(url) +func TestHandlingOfDuplicatedMetrics(t *testing.T) { + if _, err := os.Stat(binary); err != nil { + t.Skipf("node_exporter binary not available, try to run `make build` first: %s", err) + } + + dir, err := ioutil.TempDir("", "node-exporter") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + content := []byte("dummy_metric 1\n") + if err := ioutil.WriteFile(filepath.Join(dir, "a.prom"), content, 0600); err != nil { + t.Fatal(err) + } + if err := ioutil.WriteFile(filepath.Join(dir, "b.prom"), content, 0600); err != nil { + t.Fatal(err) + } + + exporter := exec.Command(binary, "-web.listen-address", address, "-collector.textfile.directory", dir) + test := func(_ int) error { + return queryExporter(address) + } + + if err := runCommandAndTests(exporter, test); err != nil { + t.Error(err) + } +} + +func queryExporter(address string) error { + resp, err := http.Get(fmt.Sprintf("http://%s/metrics", address)) if err != nil { return err } if err := resp.Body.Close(); err != nil { return err } - if want, have := resp.StatusCode, http.StatusOK; want != have { + if want, have := http.StatusOK, resp.StatusCode; want != have { return fmt.Errorf("want /metrics status code %d, have %d", want, have) } return nil } + +func runCommandAndTests(cmd *exec.Cmd, fn func(pid int) error) error { + errc := make(chan error) + go func() { + if err := cmd.Run(); err != nil { + errc <- fmt.Errorf("execution of command failed: %s", err) + } else { + errc <- nil + } + }() + + // Allow the process to start before running any tests. + select { + case err := <-errc: + return err + case <-time.After(100 * time.Millisecond): + } + + go func(pid int) { + errc <- fn(pid) + }(cmd.Process.Pid) + + select { + case err := <-errc: + if cmd.Process != nil { + cmd.Process.Kill() + } + if err != nil { + return err + } + } + return nil +}