api: provide per metric metadata (#6420)

* api: provide per metric metadata

This adds a new endpoint that provides per metric metadata via the V1 API.

It collapses metadata that is equal across all targets, and aggregates under the same metric name the ones that differ.

* Allow tests to be asserted on response length

Some tests e.g. limit on API responses, don't require an assertion on
equality.

This allows us to assert against response length instead of
equality.

Signed-off-by: gotjosh <josue@grafana.com>
This commit is contained in:
gotjosh 2019-12-10 14:56:16 +00:00 committed by Brian Brazil
parent 466cc36ba0
commit 0a0a228db3
3 changed files with 346 additions and 41 deletions

View File

@ -569,7 +569,7 @@ $ curl http://localhost:9090/api/v1/alerts
## Querying target metadata
The following endpoint returns metadata about metrics currently scraped by targets.
The following endpoint returns metadata about metrics currently scraped from targets.
This is **experimental** and might change in the future.
```
@ -653,6 +653,52 @@ curl -G http://localhost:9091/api/v1/targets/metadata \
}
```
## Querying metric metadata
It returns metadata about metrics currently scrapped from targets. However, it does not provide any target information.
This is considered **experimental** and might change in the future.
```
GET /api/v1/metadata
```
URL query parameters:
- `limit=<number>`: Maximum number of metrics to return.
The `data` section of the query result consists of an object where each key is a metric name and each value is a list of unique metadata objects, as exposed for that metric name across all targets.
The following example returns two metrics. Note that the metric `http_requests_total` has more than one object in the list. At least one target has a value for `HELP` that do not match with the rest.
```json
curl -G http://localhost:9090/api/v1/metadata?limit=2
{
"status": "success",
"data": {
"cortex_ring_tokens": [
{
"type": "gauge",
"help": "Number of tokens in the ring",
"unit": ""
}
],
"http_requests_total": [
{
"type": "counter",
"help": "Number of HTTP requests",
"unit": ""
},
{
"type": "counter",
"help": "Amount of HTTP requests",
"unit": ""
}
]
}
}
```
## Alertmanagers
The following endpoint returns an overview of the current state of the

View File

@ -276,6 +276,8 @@ func (api *API) Register(r *route.Router) {
r.Get("/targets/metadata", wrap(api.targetMetadata))
r.Get("/alertmanagers", wrap(api.alertmanagers))
r.Get("/metadata", wrap(api.metricMetadata))
r.Get("/status/config", wrap(api.serveConfig))
r.Get("/status/runtimeinfo", wrap(api.serveRuntimeInfo))
r.Get("/status/buildinfo", wrap(api.serveBuildInfo))
@ -688,7 +690,7 @@ func (api *API) targetMetadata(r *http.Request) apiFuncResult {
metric := r.FormValue("metric")
var res []metricMetadata
res := []metricMetadata{}
for _, tt := range api.targetRetriever.TargetsActive() {
for _, t := range tt {
if limit >= 0 && len(res) >= limit {
@ -722,9 +724,7 @@ func (api *API) targetMetadata(r *http.Request) apiFuncResult {
}
}
}
if len(res) == 0 {
return apiFuncResult{nil, &apiError{errorNotFound, errors.New("specified metadata not found")}, nil, nil}
}
return apiFuncResult{res, nil, nil, nil}
}
@ -805,6 +805,58 @@ func rulesAlertsToAPIAlerts(rulesAlerts []*rules.Alert) []*Alert {
return apiAlerts
}
type metadata struct {
Type textparse.MetricType `json:"type"`
Help string `json:"help"`
Unit string `json:"unit"`
}
func (api *API) metricMetadata(r *http.Request) apiFuncResult {
metrics := map[string]map[metadata]struct{}{}
limit := -1
if s := r.FormValue("limit"); s != "" {
var err error
if limit, err = strconv.Atoi(s); err != nil {
return apiFuncResult{nil, &apiError{errorBadData, errors.New("limit must be a number")}, nil, nil}
}
}
for _, tt := range api.targetRetriever.TargetsActive() {
for _, t := range tt {
for _, mm := range t.MetadataList() {
m := metadata{Type: mm.Type, Help: mm.Help, Unit: mm.Unit}
ms, ok := metrics[mm.Metric]
if !ok {
ms = map[metadata]struct{}{}
metrics[mm.Metric] = ms
}
ms[m] = struct{}{}
}
}
}
res := map[string][]metadata{}
for name, set := range metrics {
if limit >= 0 && len(res) >= limit {
break
}
s := []metadata{}
for metadata := range set {
s = append(s, metadata)
}
res[name] = s
}
return apiFuncResult{res, nil, nil, nil}
}
// RuleDiscovery has info for all rules
type RuleDiscovery struct {
RuleGroups []*RuleGroup `json:"groups"`

View File

@ -137,7 +137,7 @@ func (t testTargetRetriever) TargetsDropped() map[string][]*scrape.Target {
return t.droppedTargets
}
func (t testTargetRetriever) setMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error {
func (t *testTargetRetriever) SetMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error {
targets, ok := t.activeTargets[identifier]
if !ok {
@ -151,6 +151,14 @@ func (t testTargetRetriever) setMetadataStoreForTargets(identifier string, metad
return nil
}
func (t *testTargetRetriever) ResetMetadataStore() {
for _, at := range t.activeTargets {
for _, tt := range at {
tt.SetMetadataStore(&testMetaStore{})
}
}
}
type testAlertmanagerRetriever struct{}
func (t testAlertmanagerRetriever) Alertmanagers() []*url.URL {
@ -302,7 +310,7 @@ func TestEndpoints(t *testing.T) {
rulesRetriever: algr,
}
testEndpoints(t, api, true)
testEndpoints(t, api, testTargetRetriever, true)
})
// Run all the API tests against a API that is wired to forward queries via
@ -366,7 +374,7 @@ func TestEndpoints(t *testing.T) {
rulesRetriever: algr,
}
testEndpoints(t, api, false)
testEndpoints(t, api, testTargetRetriever, false)
})
}
@ -449,31 +457,8 @@ func setupTestTargetRetriever(t *testing.T) *testTargetRetriever {
Active: false,
},
}
targetRetriever := newTestTargetRetriever(targets)
targetRetriever.setMetadataStoreForTargets("test", &testMetaStore{
Metadata: []scrape.MetricMetadata{
{
Metric: "go_threads",
Type: textparse.MetricTypeGauge,
Help: "Number of OS threads created.",
Unit: "",
},
},
})
targetRetriever.setMetadataStoreForTargets("blackbox", &testMetaStore{
Metadata: []scrape.MetricMetadata{
{
Metric: "prometheus_tsdb_storage_blocks_bytes",
Type: textparse.MetricTypeGauge,
Help: "The number of bytes that are currently used for local storage by all blocks.",
Unit: "",
},
},
})
return targetRetriever
return newTestTargetRetriever(targets)
}
func setupRemote(s storage.Storage) *httptest.Server {
@ -531,16 +516,23 @@ func setupRemote(s storage.Storage) *httptest.Server {
return httptest.NewServer(handler)
}
func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, testLabelAPI bool) {
start := time.Unix(0, 0)
type targetMetadata struct {
identifier string
metadata []scrape.MetricMetadata
}
type test struct {
endpoint apiFunc
params map[string]string
query url.Values
response interface{}
errType errorType
sorter func(interface{})
endpoint apiFunc
params map[string]string
query url.Values
response interface{}
responseLen int
errType errorType
sorter func(interface{})
metadata []targetMetadata
}
var tests = []test{
@ -959,6 +951,19 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
query: url.Values{
"metric": []string{"go_threads"},
},
metadata: []targetMetadata{
{
identifier: "test",
metadata: []scrape.MetricMetadata{
{
Metric: "go_threads",
Type: textparse.MetricTypeGauge,
Help: "Number of OS threads created.",
Unit: "",
},
},
},
},
response: []metricMetadata{
{
Target: labels.FromMap(map[string]string{
@ -976,6 +981,19 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
query: url.Values{
"match_target": []string{"{job=\"blackbox\"}"},
},
metadata: []targetMetadata{
{
identifier: "blackbox",
metadata: []scrape.MetricMetadata{
{
Metric: "prometheus_tsdb_storage_blocks_bytes",
Type: textparse.MetricTypeGauge,
Help: "The number of bytes that are currently used for local storage by all blocks.",
Unit: "",
},
},
},
},
response: []metricMetadata{
{
Target: labels.FromMap(map[string]string{
@ -991,6 +1009,30 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
// Without a target or metric.
{
endpoint: api.targetMetadata,
metadata: []targetMetadata{
{
identifier: "test",
metadata: []scrape.MetricMetadata{
{
Metric: "go_threads",
Type: textparse.MetricTypeGauge,
Help: "Number of OS threads created.",
Unit: "",
},
},
},
{
identifier: "blackbox",
metadata: []scrape.MetricMetadata{
{
Metric: "prometheus_tsdb_storage_blocks_bytes",
Type: textparse.MetricTypeGauge,
Help: "The number of bytes that are currently used for local storage by all blocks.",
Unit: "",
},
},
},
},
response: []metricMetadata{
{
Target: labels.FromMap(map[string]string{
@ -1024,7 +1066,7 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
query: url.Values{
"match_target": []string{"{job=\"non-existentblackbox\"}"},
},
errType: errorNotFound,
response: []metricMetadata{},
},
{
endpoint: api.alertmanagers,
@ -1041,6 +1083,148 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
},
},
},
// With metadata available.
{
endpoint: api.metricMetadata,
metadata: []targetMetadata{
{
identifier: "test",
metadata: []scrape.MetricMetadata{
{
Metric: "prometheus_engine_query_duration_seconds",
Type: textparse.MetricTypeSummary,
Help: "Query timings",
Unit: "",
},
{
Metric: "go_info",
Type: textparse.MetricTypeGauge,
Help: "Information about the Go environment.",
Unit: "",
},
},
},
},
response: map[string][]metadata{
"prometheus_engine_query_duration_seconds": {{textparse.MetricTypeSummary, "Query timings", ""}},
"go_info": {{textparse.MetricTypeGauge, "Information about the Go environment.", ""}},
},
},
// With duplicate metadata for a metric that comes from different targets.
{
endpoint: api.metricMetadata,
metadata: []targetMetadata{
{
identifier: "test",
metadata: []scrape.MetricMetadata{
{
Metric: "go_threads",
Type: textparse.MetricTypeGauge,
Help: "Number of OS threads created",
Unit: "",
},
},
},
{
identifier: "blackbox",
metadata: []scrape.MetricMetadata{
{
Metric: "go_threads",
Type: textparse.MetricTypeGauge,
Help: "Number of OS threads created",
Unit: "",
},
},
},
},
response: map[string][]metadata{
"go_threads": {{textparse.MetricTypeGauge, "Number of OS threads created", ""}},
},
},
// With non-duplicate metadata for the same metric from different targets.
{
endpoint: api.metricMetadata,
metadata: []targetMetadata{
{
identifier: "test",
metadata: []scrape.MetricMetadata{
{
Metric: "go_threads",
Type: textparse.MetricTypeGauge,
Help: "Number of OS threads created",
Unit: "",
},
},
},
{
identifier: "blackbox",
metadata: []scrape.MetricMetadata{
{
Metric: "go_threads",
Type: textparse.MetricTypeGauge,
Help: "Number of OS threads that were created.",
Unit: "",
},
},
},
},
response: map[string][]metadata{
"go_threads": []metadata{
{textparse.MetricTypeGauge, "Number of OS threads created", ""},
{textparse.MetricTypeGauge, "Number of OS threads that were created.", ""},
},
},
sorter: func(m interface{}) {
v := m.(map[string][]metadata)["go_threads"]
sort.Slice(v, func(i, j int) bool {
return v[i].Help < v[j].Help
})
},
},
// With a limit for the number of metrics returned
{
endpoint: api.metricMetadata,
query: url.Values{
"limit": []string{"2"},
},
metadata: []targetMetadata{
{
identifier: "test",
metadata: []scrape.MetricMetadata{
{
Metric: "go_threads",
Type: textparse.MetricTypeGauge,
Help: "Number of OS threads created",
Unit: "",
},
{
Metric: "prometheus_engine_query_duration_seconds",
Type: textparse.MetricTypeSummary,
Help: "Query Timmings.",
Unit: "",
},
},
},
{
identifier: "blackbox",
metadata: []scrape.MetricMetadata{
{
Metric: "go_gc_duration_seconds",
Type: textparse.MetricTypeSummary,
Help: "A summary of the GC invocation durations.",
Unit: "",
},
},
},
},
responseLen: 2,
},
// With no available metadata
{
endpoint: api.metricMetadata,
response: map[string][]metadata{},
},
{
endpoint: api.serveConfig,
response: &prometheusConfig{
@ -1233,6 +1417,12 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
if err != nil {
t.Fatal(err)
}
tr.ResetMetadataStore()
for _, tm := range test.metadata {
tr.SetMetadataStoreForTargets(tm.identifier, &testMetaStore{Metadata: tm.metadata})
}
res := test.endpoint(req.WithContext(ctx))
assertAPIError(t, res.err, test.errType)
@ -1240,7 +1430,11 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
test.sorter(res.data)
}
assertAPIResponse(t, res.data, test.response)
if test.responseLen != 0 {
assertAPIResponseLength(t, res.data, test.responseLen)
} else {
assertAPIResponse(t, res.data, test.response)
}
}
}
}
@ -1284,6 +1478,19 @@ func assertAPIResponse(t *testing.T, got interface{}, exp interface{}) {
}
}
func assertAPIResponseLength(t *testing.T, got interface{}, expLen int) {
t.Helper()
gotLen := reflect.ValueOf(got).Len()
if gotLen != expLen {
t.Fatalf(
"Response length does not match, expected:\n%d\ngot:\n%d",
expLen,
gotLen,
)
}
}
func TestSampledReadEndpoint(t *testing.T) {
suite, err := promql.NewTest(t, `
load 1m