From e56786b2218f39893f0471bbcb444c4249e6c781 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Fri, 26 Dec 2014 12:37:30 +0000 Subject: [PATCH] Have scrape time as a pseudovariable, not a prometheus variable. This ensures it has the right timestamp, and is easier to work with. Switch sd variable away from 'outcome', using total/failed instead. --- retrieval/target.go | 55 +++++++++++++++--------------------- retrieval/target_provider.go | 20 +++++++------ retrieval/target_test.go | 33 +++++++++++++++++----- retrieval/targetmanager.go | 1 + retrieval/targetpool_test.go | 12 ++++---- storage/local/persistence.go | 2 +- storage/local/storage.go | 6 ++-- 7 files changed, 71 insertions(+), 58 deletions(-) diff --git a/retrieval/target.go b/retrieval/target.go index f8969b51d..66b378318 100644 --- a/retrieval/target.go +++ b/retrieval/target.go @@ -36,30 +36,19 @@ const ( InstanceLabel clientmodel.LabelName = "instance" // ScrapeHealthMetricName is the metric name for the synthetic health // variable. - ScrapeHealthMetricName clientmodel.LabelValue = "up" + scrapeHealthMetricName clientmodel.LabelValue = "up" + // ScrapeTimeMetricName is the metric name for the synthetic scrape duration + // variable. + scrapeDurationMetricName clientmodel.LabelValue = "scrape_duration_seconds" // Constants for instrumentation. namespace = "prometheus" - job = "target_job" - instance = "target_instance" - failure = "failure" - outcome = "outcome" - success = "success" interval = "interval" ) var ( localhostRepresentations = []string{"http://127.0.0.1", "http://localhost"} - targetOperationLatencies = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Namespace: namespace, - Name: "target_operation_latency_milliseconds", - Help: "The latencies for target operations.", - Objectives: []float64{0.01, 0.05, 0.5, 0.90, 0.99}, - }, - []string{job, instance, outcome}, - ) targetIntervalLength = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: namespace, @@ -72,7 +61,6 @@ var ( ) func init() { - prometheus.MustRegister(targetOperationLatencies) prometheus.MustRegister(targetIntervalLength) } @@ -189,28 +177,37 @@ func NewTarget(url string, deadline time.Duration, baseLabels clientmodel.LabelS return target } -func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clientmodel.Timestamp, healthy bool) { - metric := clientmodel.Metric{} +func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clientmodel.Timestamp, healthy bool, scrapeDuration time.Duration) { + healthMetric := clientmodel.Metric{} + durationMetric := clientmodel.Metric{} for label, value := range t.baseLabels { - metric[label] = value + healthMetric[label] = value + durationMetric[label] = value } - metric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(ScrapeHealthMetricName) - metric[InstanceLabel] = clientmodel.LabelValue(t.URL()) + healthMetric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(scrapeHealthMetricName) + durationMetric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(scrapeDurationMetricName) + healthMetric[InstanceLabel] = clientmodel.LabelValue(t.URL()) + durationMetric[InstanceLabel] = clientmodel.LabelValue(t.URL()) healthValue := clientmodel.SampleValue(0) if healthy { healthValue = clientmodel.SampleValue(1) } - sample := &clientmodel.Sample{ - Metric: metric, + healthSample := &clientmodel.Sample{ + Metric: healthMetric, Timestamp: timestamp, Value: healthValue, } + durationSample := &clientmodel.Sample{ + Metric: durationMetric, + Timestamp: timestamp, + Value: clientmodel.SampleValue(float64(scrapeDuration) / float64(time.Second)), + } ingester.Ingest(&extraction.Result{ Err: nil, - Samples: clientmodel.Samples{sample}, + Samples: clientmodel.Samples{healthSample, durationSample}, }) } @@ -292,23 +289,15 @@ const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client func (t *target) scrape(ingester extraction.Ingester) (err error) { timestamp := clientmodel.Now() defer func(start time.Time) { - ms := float64(time.Since(start)) / float64(time.Millisecond) - labels := prometheus.Labels{ - job: string(t.baseLabels[clientmodel.JobLabel]), - instance: t.URL(), - outcome: success, - } t.Lock() // Writing t.state and t.lastError requires the lock. if err == nil { t.state = Alive - labels[outcome] = failure } else { t.state = Unreachable } t.lastError = err t.Unlock() - targetOperationLatencies.With(labels).Observe(ms) - t.recordScrapeHealth(ingester, timestamp, err == nil) + t.recordScrapeHealth(ingester, timestamp, err == nil, time.Since(start)) }(time.Now()) req, err := http.NewRequest("GET", t.URL(), nil) diff --git a/retrieval/target_provider.go b/retrieval/target_provider.go index 2313a9985..a5e2af7b9 100644 --- a/retrieval/target_provider.go +++ b/retrieval/target_provider.go @@ -33,17 +33,22 @@ import ( const resolvConf = "/etc/resolv.conf" var ( - dnsSDLookupsCount = prometheus.NewCounterVec( + dnsSDLookupsCount = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: namespace, Name: "dns_sd_lookups_total", - Help: "The number of DNS-SD lookup successes/failures per pool.", - }, - []string{outcome}, - ) + Help: "The number of DNS-SD lookups.", + }) + dnsSDLookupFailuresCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "dns_sd_lookup_failures_total", + Help: "The number of DNS-SD lookup failures.", + }) ) func init() { + prometheus.MustRegister(dnsSDLookupFailuresCount) prometheus.MustRegister(dnsSDLookupsCount) } @@ -77,11 +82,10 @@ func NewSdTargetProvider(job config.JobConfig) *sdTargetProvider { func (p *sdTargetProvider) Targets() ([]Target, error) { var err error defer func() { - message := success + dnsSDLookupsCount.Inc() if err != nil { - message = failure + dnsSDLookupFailuresCount.Inc() } - dnsSDLookupsCount.WithLabelValues(message).Inc() }() if time.Since(p.lastRefresh) < p.refreshInterval { diff --git a/retrieval/target_test.go b/retrieval/target_test.go index 4efe413de..73492ce93 100644 --- a/retrieval/target_test.go +++ b/retrieval/target_test.go @@ -39,7 +39,7 @@ func (i *collectResultIngester) Ingest(r *extraction.Result) error { func TestTargetScrapeUpdatesState(t *testing.T) { testTarget := target{ state: Unknown, - url: "bad schema", + url: "bad schema", httpClient: utility.NewDeadlineClient(0), } testTarget.scrape(nopIngester{}) @@ -50,25 +50,25 @@ func TestTargetScrapeUpdatesState(t *testing.T) { func TestTargetRecordScrapeHealth(t *testing.T) { testTarget := target{ - url: "http://example.url", + url: "http://example.url", baseLabels: clientmodel.LabelSet{clientmodel.JobLabel: "testjob"}, httpClient: utility.NewDeadlineClient(0), } now := clientmodel.Now() ingester := &collectResultIngester{} - testTarget.recordScrapeHealth(ingester, now, true) + testTarget.recordScrapeHealth(ingester, now, true, 2 * time.Second) result := ingester.result - if len(result.Samples) != 1 { - t.Fatalf("Expected one sample, got %d", len(result.Samples)) + if len(result.Samples) != 2 { + t.Fatalf("Expected two samples, got %d", len(result.Samples)) } actual := result.Samples[0] expected := &clientmodel.Sample{ Metric: clientmodel.Metric{ - clientmodel.MetricNameLabel: ScrapeHealthMetricName, + clientmodel.MetricNameLabel: scrapeHealthMetricName, InstanceLabel: "http://example.url", clientmodel.JobLabel: "testjob", }, @@ -83,6 +83,25 @@ func TestTargetRecordScrapeHealth(t *testing.T) { if !actual.Equal(expected) { t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual) } + + actual = result.Samples[1] + expected = &clientmodel.Sample{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: scrapeDurationMetricName, + InstanceLabel: "http://example.url", + clientmodel.JobLabel: "testjob", + }, + Timestamp: now, + Value: 2.0, + } + + if result.Err != nil { + t.Fatalf("Got unexpected error: %v", result.Err) + } + + if !actual.Equal(expected) { + t.Fatalf("Expected and actual samples not equal. Expected: %v, actual: %v", expected, actual) + } } func TestTargetScrapeTimeout(t *testing.T) { @@ -147,7 +166,7 @@ func TestTargetScrape404(t *testing.T) { func TestTargetRunScraperScrapes(t *testing.T) { testTarget := target{ state: Unknown, - url: "bad schema", + url: "bad schema", httpClient: utility.NewDeadlineClient(0), scraperStopping: make(chan struct{}), scraperStopped: make(chan struct{}), diff --git a/retrieval/targetmanager.go b/retrieval/targetmanager.go index 01ba3a68d..4289215c6 100644 --- a/retrieval/targetmanager.go +++ b/retrieval/targetmanager.go @@ -15,6 +15,7 @@ package retrieval import ( "sync" + "github.com/golang/glog" "github.com/prometheus/client_golang/extraction" diff --git a/retrieval/targetpool_test.go b/retrieval/targetpool_test.go index 8fd75353d..ca6217a1b 100644 --- a/retrieval/targetpool_test.go +++ b/retrieval/targetpool_test.go @@ -27,7 +27,7 @@ func testTargetPool(t testing.TB) { } type input struct { - url string + url string scheduledFor time.Time } @@ -84,7 +84,7 @@ func testTargetPool(t testing.TB) { for _, input := range scenario.inputs { target := target{ - url: input.url, + url: input.url, newBaseLabels: make(chan clientmodel.LabelSet, 1), httpClient: &http.Client{}, } @@ -114,7 +114,7 @@ func TestTargetPool(t *testing.T) { func TestTargetPoolReplaceTargets(t *testing.T) { pool := NewTargetPool(nil, nil, nopIngester{}, time.Duration(1)) oldTarget1 := &target{ - url: "example1", + url: "example1", state: Unreachable, scraperStopping: make(chan struct{}), scraperStopped: make(chan struct{}), @@ -122,7 +122,7 @@ func TestTargetPoolReplaceTargets(t *testing.T) { httpClient: &http.Client{}, } oldTarget2 := &target{ - url: "example2", + url: "example2", state: Unreachable, scraperStopping: make(chan struct{}), scraperStopped: make(chan struct{}), @@ -130,7 +130,7 @@ func TestTargetPoolReplaceTargets(t *testing.T) { httpClient: &http.Client{}, } newTarget1 := &target{ - url: "example1", + url: "example1", state: Alive, scraperStopping: make(chan struct{}), scraperStopped: make(chan struct{}), @@ -138,7 +138,7 @@ func TestTargetPoolReplaceTargets(t *testing.T) { httpClient: &http.Client{}, } newTarget2 := &target{ - url: "example3", + url: "example3", state: Alive, scraperStopping: make(chan struct{}), scraperStopped: make(chan struct{}), diff --git a/storage/local/persistence.go b/storage/local/persistence.go index 36674ef46..73d3f00f3 100644 --- a/storage/local/persistence.go +++ b/storage/local/persistence.go @@ -869,7 +869,7 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap iter := fingerprintToSeries.iter() defer func() { // Consume the iterator in any case to not leak goroutines. - for _ = range iter { + for range iter { } }() diff --git a/storage/local/storage.go b/storage/local/storage.go index 0618ba57d..7224c615f 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -567,7 +567,7 @@ func (s *memorySeriesStorage) cycleThroughMemoryFingerprints() chan clientmodel. defer func() { if fpIter != nil { - for _ = range fpIter { + for range fpIter { // Consume the iterator. } } @@ -661,9 +661,9 @@ loop: } } // Wait until both channels are closed. - for _ = range memoryFingerprints { + for range memoryFingerprints { } - for _ = range archivedFingerprints { + for range archivedFingerprints { } }