From 0a0a228db380c3d68decd720016ba568af8b4b6d Mon Sep 17 00:00:00 2001 From: gotjosh Date: Tue, 10 Dec 2019 14:56:16 +0000 Subject: [PATCH] api: provide per metric metadata (#6420) * api: provide per metric metadata This adds a new endpoint that provides per metric metadata via the V1 API. It collapses metadata that is equal across all targets, and aggregates under the same metric name the ones that differ. * Allow tests to be asserted on response length Some tests e.g. limit on API responses, don't require an assertion on equality. This allows us to assert against response length instead of equality. Signed-off-by: gotjosh --- docs/querying/api.md | 48 ++++++- web/api/v1/api.go | 60 ++++++++- web/api/v1/api_test.go | 279 +++++++++++++++++++++++++++++++++++------ 3 files changed, 346 insertions(+), 41 deletions(-) diff --git a/docs/querying/api.md b/docs/querying/api.md index fe8b723a6..a2b1f19c7 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -569,7 +569,7 @@ $ curl http://localhost:9090/api/v1/alerts ## Querying target metadata -The following endpoint returns metadata about metrics currently scraped by targets. +The following endpoint returns metadata about metrics currently scraped from targets. This is **experimental** and might change in the future. ``` @@ -653,6 +653,52 @@ curl -G http://localhost:9091/api/v1/targets/metadata \ } ``` +## Querying metric metadata + +It returns metadata about metrics currently scrapped from targets. However, it does not provide any target information. +This is considered **experimental** and might change in the future. + +``` +GET /api/v1/metadata +``` + +URL query parameters: + +- `limit=`: Maximum number of metrics to return. + +The `data` section of the query result consists of an object where each key is a metric name and each value is a list of unique metadata objects, as exposed for that metric name across all targets. + +The following example returns two metrics. Note that the metric `http_requests_total` has more than one object in the list. At least one target has a value for `HELP` that do not match with the rest. + +```json +curl -G http://localhost:9090/api/v1/metadata?limit=2 + +{ + "status": "success", + "data": { + "cortex_ring_tokens": [ + { + "type": "gauge", + "help": "Number of tokens in the ring", + "unit": "" + } + ], + "http_requests_total": [ + { + "type": "counter", + "help": "Number of HTTP requests", + "unit": "" + }, + { + "type": "counter", + "help": "Amount of HTTP requests", + "unit": "" + } + ] + } +} +``` + ## Alertmanagers The following endpoint returns an overview of the current state of the diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 8d7e75440..10e3af6c4 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -276,6 +276,8 @@ func (api *API) Register(r *route.Router) { r.Get("/targets/metadata", wrap(api.targetMetadata)) r.Get("/alertmanagers", wrap(api.alertmanagers)) + r.Get("/metadata", wrap(api.metricMetadata)) + r.Get("/status/config", wrap(api.serveConfig)) r.Get("/status/runtimeinfo", wrap(api.serveRuntimeInfo)) r.Get("/status/buildinfo", wrap(api.serveBuildInfo)) @@ -688,7 +690,7 @@ func (api *API) targetMetadata(r *http.Request) apiFuncResult { metric := r.FormValue("metric") - var res []metricMetadata + res := []metricMetadata{} for _, tt := range api.targetRetriever.TargetsActive() { for _, t := range tt { if limit >= 0 && len(res) >= limit { @@ -722,9 +724,7 @@ func (api *API) targetMetadata(r *http.Request) apiFuncResult { } } } - if len(res) == 0 { - return apiFuncResult{nil, &apiError{errorNotFound, errors.New("specified metadata not found")}, nil, nil} - } + return apiFuncResult{res, nil, nil, nil} } @@ -805,6 +805,58 @@ func rulesAlertsToAPIAlerts(rulesAlerts []*rules.Alert) []*Alert { return apiAlerts } +type metadata struct { + Type textparse.MetricType `json:"type"` + Help string `json:"help"` + Unit string `json:"unit"` +} + +func (api *API) metricMetadata(r *http.Request) apiFuncResult { + metrics := map[string]map[metadata]struct{}{} + + limit := -1 + if s := r.FormValue("limit"); s != "" { + var err error + if limit, err = strconv.Atoi(s); err != nil { + return apiFuncResult{nil, &apiError{errorBadData, errors.New("limit must be a number")}, nil, nil} + } + } + + for _, tt := range api.targetRetriever.TargetsActive() { + for _, t := range tt { + for _, mm := range t.MetadataList() { + m := metadata{Type: mm.Type, Help: mm.Help, Unit: mm.Unit} + ms, ok := metrics[mm.Metric] + + if !ok { + ms = map[metadata]struct{}{} + metrics[mm.Metric] = ms + } + + ms[m] = struct{}{} + } + } + } + + res := map[string][]metadata{} + + for name, set := range metrics { + if limit >= 0 && len(res) >= limit { + break + } + + s := []metadata{} + + for metadata := range set { + s = append(s, metadata) + } + + res[name] = s + } + + return apiFuncResult{res, nil, nil, nil} +} + // RuleDiscovery has info for all rules type RuleDiscovery struct { RuleGroups []*RuleGroup `json:"groups"` diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index eafe52f54..c7987c8d7 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -137,7 +137,7 @@ func (t testTargetRetriever) TargetsDropped() map[string][]*scrape.Target { return t.droppedTargets } -func (t testTargetRetriever) setMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error { +func (t *testTargetRetriever) SetMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error { targets, ok := t.activeTargets[identifier] if !ok { @@ -151,6 +151,14 @@ func (t testTargetRetriever) setMetadataStoreForTargets(identifier string, metad return nil } +func (t *testTargetRetriever) ResetMetadataStore() { + for _, at := range t.activeTargets { + for _, tt := range at { + tt.SetMetadataStore(&testMetaStore{}) + } + } +} + type testAlertmanagerRetriever struct{} func (t testAlertmanagerRetriever) Alertmanagers() []*url.URL { @@ -302,7 +310,7 @@ func TestEndpoints(t *testing.T) { rulesRetriever: algr, } - testEndpoints(t, api, true) + testEndpoints(t, api, testTargetRetriever, true) }) // Run all the API tests against a API that is wired to forward queries via @@ -366,7 +374,7 @@ func TestEndpoints(t *testing.T) { rulesRetriever: algr, } - testEndpoints(t, api, false) + testEndpoints(t, api, testTargetRetriever, false) }) } @@ -449,31 +457,8 @@ func setupTestTargetRetriever(t *testing.T) *testTargetRetriever { Active: false, }, } - targetRetriever := newTestTargetRetriever(targets) - targetRetriever.setMetadataStoreForTargets("test", &testMetaStore{ - Metadata: []scrape.MetricMetadata{ - { - Metric: "go_threads", - Type: textparse.MetricTypeGauge, - Help: "Number of OS threads created.", - Unit: "", - }, - }, - }) - - targetRetriever.setMetadataStoreForTargets("blackbox", &testMetaStore{ - Metadata: []scrape.MetricMetadata{ - { - Metric: "prometheus_tsdb_storage_blocks_bytes", - Type: textparse.MetricTypeGauge, - Help: "The number of bytes that are currently used for local storage by all blocks.", - Unit: "", - }, - }, - }) - - return targetRetriever + return newTestTargetRetriever(targets) } func setupRemote(s storage.Storage) *httptest.Server { @@ -531,16 +516,23 @@ func setupRemote(s storage.Storage) *httptest.Server { return httptest.NewServer(handler) } -func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { +func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, testLabelAPI bool) { start := time.Unix(0, 0) + type targetMetadata struct { + identifier string + metadata []scrape.MetricMetadata + } + type test struct { - endpoint apiFunc - params map[string]string - query url.Values - response interface{} - errType errorType - sorter func(interface{}) + endpoint apiFunc + params map[string]string + query url.Values + response interface{} + responseLen int + errType errorType + sorter func(interface{}) + metadata []targetMetadata } var tests = []test{ @@ -959,6 +951,19 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { query: url.Values{ "metric": []string{"go_threads"}, }, + metadata: []targetMetadata{ + { + identifier: "test", + metadata: []scrape.MetricMetadata{ + { + Metric: "go_threads", + Type: textparse.MetricTypeGauge, + Help: "Number of OS threads created.", + Unit: "", + }, + }, + }, + }, response: []metricMetadata{ { Target: labels.FromMap(map[string]string{ @@ -976,6 +981,19 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { query: url.Values{ "match_target": []string{"{job=\"blackbox\"}"}, }, + metadata: []targetMetadata{ + { + identifier: "blackbox", + metadata: []scrape.MetricMetadata{ + { + Metric: "prometheus_tsdb_storage_blocks_bytes", + Type: textparse.MetricTypeGauge, + Help: "The number of bytes that are currently used for local storage by all blocks.", + Unit: "", + }, + }, + }, + }, response: []metricMetadata{ { Target: labels.FromMap(map[string]string{ @@ -991,6 +1009,30 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { // Without a target or metric. { endpoint: api.targetMetadata, + metadata: []targetMetadata{ + { + identifier: "test", + metadata: []scrape.MetricMetadata{ + { + Metric: "go_threads", + Type: textparse.MetricTypeGauge, + Help: "Number of OS threads created.", + Unit: "", + }, + }, + }, + { + identifier: "blackbox", + metadata: []scrape.MetricMetadata{ + { + Metric: "prometheus_tsdb_storage_blocks_bytes", + Type: textparse.MetricTypeGauge, + Help: "The number of bytes that are currently used for local storage by all blocks.", + Unit: "", + }, + }, + }, + }, response: []metricMetadata{ { Target: labels.FromMap(map[string]string{ @@ -1024,7 +1066,7 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { query: url.Values{ "match_target": []string{"{job=\"non-existentblackbox\"}"}, }, - errType: errorNotFound, + response: []metricMetadata{}, }, { endpoint: api.alertmanagers, @@ -1041,6 +1083,148 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { }, }, }, + // With metadata available. + { + endpoint: api.metricMetadata, + metadata: []targetMetadata{ + { + identifier: "test", + metadata: []scrape.MetricMetadata{ + { + Metric: "prometheus_engine_query_duration_seconds", + Type: textparse.MetricTypeSummary, + Help: "Query timings", + Unit: "", + }, + { + Metric: "go_info", + Type: textparse.MetricTypeGauge, + Help: "Information about the Go environment.", + Unit: "", + }, + }, + }, + }, + response: map[string][]metadata{ + "prometheus_engine_query_duration_seconds": {{textparse.MetricTypeSummary, "Query timings", ""}}, + "go_info": {{textparse.MetricTypeGauge, "Information about the Go environment.", ""}}, + }, + }, + // With duplicate metadata for a metric that comes from different targets. + { + endpoint: api.metricMetadata, + metadata: []targetMetadata{ + { + identifier: "test", + metadata: []scrape.MetricMetadata{ + { + Metric: "go_threads", + Type: textparse.MetricTypeGauge, + Help: "Number of OS threads created", + Unit: "", + }, + }, + }, + { + identifier: "blackbox", + metadata: []scrape.MetricMetadata{ + { + Metric: "go_threads", + Type: textparse.MetricTypeGauge, + Help: "Number of OS threads created", + Unit: "", + }, + }, + }, + }, + response: map[string][]metadata{ + "go_threads": {{textparse.MetricTypeGauge, "Number of OS threads created", ""}}, + }, + }, + // With non-duplicate metadata for the same metric from different targets. + { + endpoint: api.metricMetadata, + metadata: []targetMetadata{ + { + identifier: "test", + metadata: []scrape.MetricMetadata{ + { + Metric: "go_threads", + Type: textparse.MetricTypeGauge, + Help: "Number of OS threads created", + Unit: "", + }, + }, + }, + { + identifier: "blackbox", + metadata: []scrape.MetricMetadata{ + { + Metric: "go_threads", + Type: textparse.MetricTypeGauge, + Help: "Number of OS threads that were created.", + Unit: "", + }, + }, + }, + }, + response: map[string][]metadata{ + "go_threads": []metadata{ + {textparse.MetricTypeGauge, "Number of OS threads created", ""}, + {textparse.MetricTypeGauge, "Number of OS threads that were created.", ""}, + }, + }, + sorter: func(m interface{}) { + v := m.(map[string][]metadata)["go_threads"] + + sort.Slice(v, func(i, j int) bool { + return v[i].Help < v[j].Help + }) + }, + }, + // With a limit for the number of metrics returned + { + endpoint: api.metricMetadata, + query: url.Values{ + "limit": []string{"2"}, + }, + metadata: []targetMetadata{ + { + identifier: "test", + metadata: []scrape.MetricMetadata{ + { + Metric: "go_threads", + Type: textparse.MetricTypeGauge, + Help: "Number of OS threads created", + Unit: "", + }, + { + Metric: "prometheus_engine_query_duration_seconds", + Type: textparse.MetricTypeSummary, + Help: "Query Timmings.", + Unit: "", + }, + }, + }, + { + identifier: "blackbox", + metadata: []scrape.MetricMetadata{ + { + Metric: "go_gc_duration_seconds", + Type: textparse.MetricTypeSummary, + Help: "A summary of the GC invocation durations.", + Unit: "", + }, + }, + }, + }, + responseLen: 2, + }, + // With no available metadata + { + endpoint: api.metricMetadata, + response: map[string][]metadata{}, + }, { endpoint: api.serveConfig, response: &prometheusConfig{ @@ -1233,6 +1417,12 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { if err != nil { t.Fatal(err) } + + tr.ResetMetadataStore() + for _, tm := range test.metadata { + tr.SetMetadataStoreForTargets(tm.identifier, &testMetaStore{Metadata: tm.metadata}) + } + res := test.endpoint(req.WithContext(ctx)) assertAPIError(t, res.err, test.errType) @@ -1240,7 +1430,11 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) { test.sorter(res.data) } - assertAPIResponse(t, res.data, test.response) + if test.responseLen != 0 { + assertAPIResponseLength(t, res.data, test.responseLen) + } else { + assertAPIResponse(t, res.data, test.response) + } } } } @@ -1284,6 +1478,19 @@ func assertAPIResponse(t *testing.T, got interface{}, exp interface{}) { } } +func assertAPIResponseLength(t *testing.T, got interface{}, expLen int) { + t.Helper() + + gotLen := reflect.ValueOf(got).Len() + if gotLen != expLen { + t.Fatalf( + "Response length does not match, expected:\n%d\ngot:\n%d", + expLen, + gotLen, + ) + } +} + func TestSampledReadEndpoint(t *testing.T) { suite, err := promql.NewTest(t, ` load 1m