From c93e54d240db26595c230150cc082d086feebdb2 Mon Sep 17 00:00:00 2001 From: David Kaltschmidt Date: Wed, 8 Feb 2017 12:58:40 +0100 Subject: [PATCH 1/7] Adds execution timer stats to the range query API consumers should be able to get insight into the query run times. The UI currently measures total roundtrip times. This PR allows for more fine grained metrics to be exposed. * adds new timer for total execution time (queue + eval) * expose new timer, queue timer, and eval timer in stats field of the range query response: ```json { "status": "success", "data": { "resultType": "matrix", "result": [], "stats": { "execQueueTimeNs": 4683, "execTotalTimeNs": 2086587, "totalEvalTimeNs": 2077851 } } } ``` * stats field is optional, only set when query parameter `stats` is not empty Try it via ```sh curl 'http://localhost:9090/api/v1/query_range?query=up&start=1486480279&end=1486483879&step=14000&stats=true' ``` Review feedback * moved query stats json generation to query_stats.go * use seconds for all query timers * expose all timers available * Changed ExecTotalTime string representation from Exec queue total time to Exec total time --- promql/engine.go | 2 ++ util/stats/query_stats.go | 41 +++++++++++++++++++++++++++++++++++++++ util/stats/timer.go | 5 +++++ web/api/v1/api.go | 9 +++++++++ 4 files changed, 57 insertions(+) diff --git a/promql/engine.go b/promql/engine.go index a2d6ae819..b9775d210 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -314,6 +314,8 @@ func (ng *Engine) exec(ctx context.Context, q *query) (Value, error) { ctx, cancel := context.WithTimeout(ctx, ng.options.Timeout) q.cancel = cancel + execTimer := q.stats.GetTimer(stats.ExecTotalTime).Start() + defer execTimer.Stop() queueTimer := q.stats.GetTimer(stats.ExecQueueTime).Start() if err := ng.gate.Start(ctx); err != nil { diff --git a/util/stats/query_stats.go b/util/stats/query_stats.go index 3d7ad0e83..8c75c6568 100644 --- a/util/stats/query_stats.go +++ b/util/stats/query_stats.go @@ -25,6 +25,7 @@ const ( InnerEvalTime ResultAppendTime ExecQueueTime + ExecTotalTime ) // Return a string representation of a QueryTiming identifier. @@ -42,7 +43,47 @@ func (s QueryTiming) String() string { return "Result append time" case ExecQueueTime: return "Exec queue wait time" + case ExecTotalTime: + return "Exec total time" default: return "Unknown query timing" } } + +// QueryStats with all query timers mapped to durations. +type QueryStats struct { + TotalEvalTime float64 `json:"totalEvalTime"` + ResultSortTime float64 `json:"resultSortTime"` + QueryPreparationTime float64 `json:"queryPreparationTime"` + InnerEvalTime float64 `json:"innerEvalTime"` + ResultAppendTime float64 `json:"resultAppendTime"` + ExecQueueTime float64 `json:"execQueueTime"` + ExecTotalTime float64 `json:"execTotalTime"` +} + +// MakeQueryStats makes a QueryStats struct with all QueryTimings found in the +// given TimerGroup. +func MakeQueryStats(tg *TimerGroup) *QueryStats { + var qs QueryStats + + for s, timer := range tg.timers { + switch s { + case TotalEvalTime: + qs.TotalEvalTime = timer.Duration() + case ResultSortTime: + qs.ResultSortTime = timer.Duration() + case QueryPreparationTime: + qs.QueryPreparationTime = timer.Duration() + case InnerEvalTime: + qs.InnerEvalTime = timer.Duration() + case ResultAppendTime: + qs.ResultAppendTime = timer.Duration() + case ExecQueueTime: + qs.ExecQueueTime = timer.Duration() + case ExecTotalTime: + qs.ExecTotalTime = timer.Duration() + } + } + + return &qs +} diff --git a/util/stats/timer.go b/util/stats/timer.go index 3d3ee7309..75f5868e6 100644 --- a/util/stats/timer.go +++ b/util/stats/timer.go @@ -45,6 +45,11 @@ func (t *Timer) ElapsedTime() time.Duration { return time.Since(t.start) } +// Duration returns the duration value of the timer in seconds. +func (t *Timer) Duration() float64 { + return t.duration.Seconds() +} + // Return a string representation of the Timer. func (t *Timer) String() string { return fmt.Sprintf("%s: %s", t.name, t.duration) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 122b40684..d1b058847 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -38,6 +38,7 @@ import ( "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" "github.com/prometheus/prometheus/util/httputil" + "github.com/prometheus/prometheus/util/stats" ) type status string @@ -172,6 +173,7 @@ func (api *API) Register(r *route.Router) { type queryData struct { ResultType promql.ValueType `json:"resultType"` Result promql.Value `json:"result"` + Stats *stats.QueryStats `json:"stats,omitempty"` } func (api *API) options(r *http.Request) (interface{}, *apiError) { @@ -284,9 +286,16 @@ func (api *API) queryRange(r *http.Request) (interface{}, *apiError) { return nil, &apiError{errorExec, res.Err} } + // Optional stats field in response if parameter "stats" is not empty. + var qs *stats.QueryStats + if r.FormValue("stats") != "" { + qs = stats.MakeQueryStats(qry.Stats()) + } + return &queryData{ ResultType: res.Value.Type(), Result: res.Value, + Stats: qs, }, nil } From af75ce02c1f9ec6a241d48b69c85906bccccf82e Mon Sep 17 00:00:00 2001 From: David Kaltschmidt Date: Thu, 16 Nov 2017 16:30:48 +0100 Subject: [PATCH 2/7] Review feedback * renamed MakeQueryStats * added stats to query() as well * gofmt --- util/stats/query_stats.go | 4 ++-- web/api/v1/api.go | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/util/stats/query_stats.go b/util/stats/query_stats.go index 8c75c6568..a23be41b5 100644 --- a/util/stats/query_stats.go +++ b/util/stats/query_stats.go @@ -61,9 +61,9 @@ type QueryStats struct { ExecTotalTime float64 `json:"execTotalTime"` } -// MakeQueryStats makes a QueryStats struct with all QueryTimings found in the +// NewQueryStats makes a QueryStats struct with all QueryTimings found in the // given TimerGroup. -func MakeQueryStats(tg *TimerGroup) *QueryStats { +func NewQueryStats(tg *TimerGroup) *QueryStats { var qs QueryStats for s, timer := range tg.timers { diff --git a/web/api/v1/api.go b/web/api/v1/api.go index d1b058847..825e8db72 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -171,8 +171,8 @@ func (api *API) Register(r *route.Router) { } type queryData struct { - ResultType promql.ValueType `json:"resultType"` - Result promql.Value `json:"result"` + ResultType promql.ValueType `json:"resultType"` + Result promql.Value `json:"result"` Stats *stats.QueryStats `json:"stats,omitempty"` } @@ -221,9 +221,17 @@ func (api *API) query(r *http.Request) (interface{}, *apiError) { } return nil, &apiError{errorExec, res.Err} } + + // Optional stats field in response if parameter "stats" is not empty. + var qs *stats.QueryStats + if r.FormValue("stats") != "" { + qs = stats.NewQueryStats(qry.Stats()) + } + return &queryData{ ResultType: res.Value.Type(), Result: res.Value, + Stats: qs, }, nil } @@ -289,7 +297,7 @@ func (api *API) queryRange(r *http.Request) (interface{}, *apiError) { // Optional stats field in response if parameter "stats" is not empty. var qs *stats.QueryStats if r.FormValue("stats") != "" { - qs = stats.MakeQueryStats(qry.Stats()) + qs = stats.NewQueryStats(qry.Stats()) } return &queryData{ From 87c46ea6c3368a3d5fc94f4cdff88030dcff8225 Mon Sep 17 00:00:00 2001 From: David Kaltschmidt Date: Fri, 17 Nov 2017 17:46:51 +0100 Subject: [PATCH 3/7] Renamed TotalEvalTime to EvalTotalTime * TotalFoo suggested a comprehensive timing, but TotalEvalTime was part of the Exec timings, together with Queue timings * The other option was to rename ExecTotalTime to TotalExecTime, but there was already ExecQueueTime, suggesting Exec to be some sort of group --- promql/engine.go | 2 +- util/stats/query_stats.go | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/promql/engine.go b/promql/engine.go index b9775d210..4f37ced26 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -330,7 +330,7 @@ func (ng *Engine) exec(ctx context.Context, q *query) (Value, error) { const env = "query execution" - evalTimer := q.stats.GetTimer(stats.TotalEvalTime).Start() + evalTimer := q.stats.GetTimer(stats.EvalTotalTime).Start() defer evalTimer.Stop() // The base context might already be canceled on the first iteration (e.g. during shutdown). diff --git a/util/stats/query_stats.go b/util/stats/query_stats.go index a23be41b5..5e2a9ab13 100644 --- a/util/stats/query_stats.go +++ b/util/stats/query_stats.go @@ -19,7 +19,7 @@ type QueryTiming int // Query timings. const ( - TotalEvalTime QueryTiming = iota + EvalTotalTime QueryTiming = iota ResultSortTime QueryPreparationTime InnerEvalTime @@ -31,8 +31,8 @@ const ( // Return a string representation of a QueryTiming identifier. func (s QueryTiming) String() string { switch s { - case TotalEvalTime: - return "Total eval time" + case EvalTotalTime: + return "Eval total time" case ResultSortTime: return "Result sorting time" case QueryPreparationTime: @@ -52,7 +52,7 @@ func (s QueryTiming) String() string { // QueryStats with all query timers mapped to durations. type QueryStats struct { - TotalEvalTime float64 `json:"totalEvalTime"` + EvalTotalTime float64 `json:"evalTotalTime"` ResultSortTime float64 `json:"resultSortTime"` QueryPreparationTime float64 `json:"queryPreparationTime"` InnerEvalTime float64 `json:"innerEvalTime"` @@ -68,8 +68,8 @@ func NewQueryStats(tg *TimerGroup) *QueryStats { for s, timer := range tg.timers { switch s { - case TotalEvalTime: - qs.TotalEvalTime = timer.Duration() + case EvalTotalTime: + qs.EvalTotalTime = timer.Duration() case ResultSortTime: qs.ResultSortTime = timer.Duration() case QueryPreparationTime: From 0a3faee526202b50940c1fec0fa7facfedc90a88 Mon Sep 17 00:00:00 2001 From: David Kaltschmidt Date: Fri, 17 Nov 2017 22:30:59 +0100 Subject: [PATCH 4/7] Pushed query timings into timings field, added tests --- util/stats/query_stats.go | 26 +++++++++++------- util/stats/stats_test.go | 55 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 10 deletions(-) create mode 100644 util/stats/stats_test.go diff --git a/util/stats/query_stats.go b/util/stats/query_stats.go index 5e2a9ab13..206131aa8 100644 --- a/util/stats/query_stats.go +++ b/util/stats/query_stats.go @@ -50,8 +50,8 @@ func (s QueryTiming) String() string { } } -// QueryStats with all query timers mapped to durations. -type QueryStats struct { +// QueryTimings with all query timers mapped to durations. +type queryTimings struct { EvalTotalTime float64 `json:"evalTotalTime"` ResultSortTime float64 `json:"resultSortTime"` QueryPreparationTime float64 `json:"queryPreparationTime"` @@ -61,29 +61,35 @@ type QueryStats struct { ExecTotalTime float64 `json:"execTotalTime"` } +// QueryStats currently only holding query timings +type QueryStats struct { + Timings queryTimings `json:"timings,omitempty"` +} + // NewQueryStats makes a QueryStats struct with all QueryTimings found in the // given TimerGroup. func NewQueryStats(tg *TimerGroup) *QueryStats { - var qs QueryStats + var qt queryTimings for s, timer := range tg.timers { switch s { case EvalTotalTime: - qs.EvalTotalTime = timer.Duration() + qt.EvalTotalTime = timer.Duration() case ResultSortTime: - qs.ResultSortTime = timer.Duration() + qt.ResultSortTime = timer.Duration() case QueryPreparationTime: - qs.QueryPreparationTime = timer.Duration() + qt.QueryPreparationTime = timer.Duration() case InnerEvalTime: - qs.InnerEvalTime = timer.Duration() + qt.InnerEvalTime = timer.Duration() case ResultAppendTime: - qs.ResultAppendTime = timer.Duration() + qt.ResultAppendTime = timer.Duration() case ExecQueueTime: - qs.ExecQueueTime = timer.Duration() + qt.ExecQueueTime = timer.Duration() case ExecTotalTime: - qs.ExecTotalTime = timer.Duration() + qt.ExecTotalTime = timer.Duration() } } + qs := QueryStats{Timings: qt} return &qs } diff --git a/util/stats/stats_test.go b/util/stats/stats_test.go new file mode 100644 index 000000000..b56932a34 --- /dev/null +++ b/util/stats/stats_test.go @@ -0,0 +1,55 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stats + +import ( + "encoding/json" + "regexp" + "testing" + "time" +) + +func TestTimerGroupNewTimer(t *testing.T) { + tg := NewTimerGroup() + timer := tg.GetTimer(ExecTotalTime) + if duration := timer.Duration(); duration != 0 { + t.Errorf("Expected duration of 0, but it was %f instead.", duration) + } + minimum := 2 * time.Millisecond + timer.Start() + time.Sleep(minimum) + timer.Stop() + if duration := timer.Duration(); duration == 0 { + t.Errorf("Expected duration greater than 0, but it was %f instead.", duration) + } + if elapsed := timer.ElapsedTime(); elapsed < minimum { + t.Errorf("Expected elapsed time to be greater than time slept, elapsed was %d, and time slept was %d.", elapsed.Nanoseconds(), minimum) + } +} + +func TestQueryStatsWithTimers(t *testing.T) { + tg := NewTimerGroup() + timer := tg.GetTimer(ExecTotalTime) + timer.Start() + time.Sleep(2 * time.Millisecond) + timer.Stop() + + var qs *QueryStats + qs = NewQueryStats(tg) + actual, _ := json.Marshal(qs) + match, _ := regexp.MatchString(`[,{]"execTotalTime":\d+\.\d+[,}]`, string(actual)) + if !match { + t.Errorf("Expected timings with one non-zero entry, but got %s.", actual) + } +} From 79caf5342e9e983abbbada5093480b6f93981fc5 Mon Sep 17 00:00:00 2001 From: David Kaltschmidt Date: Fri, 17 Nov 2017 22:33:24 +0100 Subject: [PATCH 5/7] Comment styles --- util/stats/query_stats.go | 4 ++-- util/stats/stats_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/util/stats/query_stats.go b/util/stats/query_stats.go index 206131aa8..9c0c42b32 100644 --- a/util/stats/query_stats.go +++ b/util/stats/query_stats.go @@ -50,7 +50,7 @@ func (s QueryTiming) String() string { } } -// QueryTimings with all query timers mapped to durations. +// queryTimings with all query timers mapped to durations. type queryTimings struct { EvalTotalTime float64 `json:"evalTotalTime"` ResultSortTime float64 `json:"resultSortTime"` @@ -61,7 +61,7 @@ type queryTimings struct { ExecTotalTime float64 `json:"execTotalTime"` } -// QueryStats currently only holding query timings +// QueryStats currently only holding query timings. type QueryStats struct { Timings queryTimings `json:"timings,omitempty"` } diff --git a/util/stats/stats_test.go b/util/stats/stats_test.go index b56932a34..e7ef2717b 100644 --- a/util/stats/stats_test.go +++ b/util/stats/stats_test.go @@ -1,4 +1,4 @@ -// Copyright 2015 The Prometheus Authors +// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at From 3c5d207a531bbe8305d5099f4128aae0e571b997 Mon Sep 17 00:00:00 2001 From: David Kaltschmidt Date: Sun, 19 Nov 2017 21:06:35 +0100 Subject: [PATCH 6/7] Changed test failure handler to Fatalf(), added comment --- util/stats/stats_test.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/util/stats/stats_test.go b/util/stats/stats_test.go index e7ef2717b..91df6fccf 100644 --- a/util/stats/stats_test.go +++ b/util/stats/stats_test.go @@ -24,17 +24,17 @@ func TestTimerGroupNewTimer(t *testing.T) { tg := NewTimerGroup() timer := tg.GetTimer(ExecTotalTime) if duration := timer.Duration(); duration != 0 { - t.Errorf("Expected duration of 0, but it was %f instead.", duration) + t.Fatalf("Expected duration of 0, but it was %f instead.", duration) } minimum := 2 * time.Millisecond timer.Start() time.Sleep(minimum) timer.Stop() if duration := timer.Duration(); duration == 0 { - t.Errorf("Expected duration greater than 0, but it was %f instead.", duration) + t.Fatalf("Expected duration greater than 0, but it was %f instead.", duration) } if elapsed := timer.ElapsedTime(); elapsed < minimum { - t.Errorf("Expected elapsed time to be greater than time slept, elapsed was %d, and time slept was %d.", elapsed.Nanoseconds(), minimum) + t.Fatalf("Expected elapsed time to be greater than time slept, elapsed was %d, and time slept was %d.", elapsed.Nanoseconds(), minimum) } } @@ -48,8 +48,9 @@ func TestQueryStatsWithTimers(t *testing.T) { var qs *QueryStats qs = NewQueryStats(tg) actual, _ := json.Marshal(qs) + // Timing value is one of multiple fields, unit is seconds (float). match, _ := regexp.MatchString(`[,{]"execTotalTime":\d+\.\d+[,}]`, string(actual)) if !match { - t.Errorf("Expected timings with one non-zero entry, but got %s.", actual) + t.Fatalf("Expected timings with one non-zero entry, but got %s.", actual) } } From 8e01e7e3881ad7632fcfae7ba5740b1381401acb Mon Sep 17 00:00:00 2001 From: David Kaltschmidt Date: Sun, 19 Nov 2017 21:15:24 +0100 Subject: [PATCH 7/7] Handle runtime errors as fatal test results --- util/stats/stats_test.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/util/stats/stats_test.go b/util/stats/stats_test.go index 91df6fccf..3f82885ce 100644 --- a/util/stats/stats_test.go +++ b/util/stats/stats_test.go @@ -47,9 +47,15 @@ func TestQueryStatsWithTimers(t *testing.T) { var qs *QueryStats qs = NewQueryStats(tg) - actual, _ := json.Marshal(qs) + actual, err := json.Marshal(qs) + if err != nil { + t.Fatalf("Unexpected error during serialization: %v", err) + } // Timing value is one of multiple fields, unit is seconds (float). - match, _ := regexp.MatchString(`[,{]"execTotalTime":\d+\.\d+[,}]`, string(actual)) + match, err := regexp.MatchString(`[,{]"execTotalTime":\d+\.\d+[,}]`, string(actual)) + if err != nil { + t.Fatalf("Unexpected error while matching string: %v", err) + } if !match { t.Fatalf("Expected timings with one non-zero entry, but got %s.", actual) }