prometheus/rules/ast/ast.go

735 lines
18 KiB
Go
Raw Normal View History

// Copyright 2013 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ast
import (
"errors"
2013-01-17 23:07:00 +00:00
"fmt"
"hash/fnv"
"math"
2013-01-17 23:07:00 +00:00
"sort"
"time"
"github.com/golang/glog"
clientmodel "github.com/prometheus/client_golang/model"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
)
// ----------------------------------------------------------------------------
// Raw data value types.
type Vector clientmodel.Samples
// BUG(julius): Pointerize this.
type Matrix []metric.SampleSet
type groupedAggregation struct {
labels clientmodel.Metric
value clientmodel.SampleValue
groupCount int
}
// ----------------------------------------------------------------------------
// Enums.
// Rule language expression types.
type ExprType int
const (
SCALAR ExprType = iota
VECTOR
MATRIX
STRING
)
// Binary operator types.
type BinOpType int
const (
ADD BinOpType = iota
SUB
MUL
DIV
MOD
NE
EQ
GT
LT
GE
LE
AND
OR
)
// Aggregation types.
type AggrType int
const (
SUM AggrType = iota
AVG
MIN
MAX
COUNT
)
// ----------------------------------------------------------------------------
// Interfaces.
// All node interfaces include the Node interface.
type Nodes []Node
type Node interface {
Type() ExprType
Children() Nodes
NodeTreeToDotGraph() string
String() string
}
// All node types implement one of the following interfaces. The name of the
// interface represents the type returned to the parent node.
type ScalarNode interface {
Node
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
Eval(timestamp clientmodel.Timestamp, view *viewAdapter) clientmodel.SampleValue
}
type VectorNode interface {
Node
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector
}
type MatrixNode interface {
Node
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix
EvalBoundaries(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix
}
type StringNode interface {
Node
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
Eval(timestamp clientmodel.Timestamp, view *viewAdapter) string
}
// ----------------------------------------------------------------------------
// ScalarNode types.
type (
// A numeric literal.
ScalarLiteral struct {
value clientmodel.SampleValue
}
// A function of numeric return type.
ScalarFunctionCall struct {
function *Function
args Nodes
}
// An arithmetic expression of numeric type.
ScalarArithExpr struct {
opType BinOpType
lhs ScalarNode
rhs ScalarNode
}
)
// ----------------------------------------------------------------------------
// VectorNode types.
type (
// Vector literal, i.e. metric name plus labelset.
VectorLiteral struct {
labels clientmodel.LabelSet
// Fingerprints are populated from labels at query analysis time.
fingerprints clientmodel.Fingerprints
}
// A function of vector return type.
VectorFunctionCall struct {
function *Function
args Nodes
}
// A vector aggregation with vector return type.
VectorAggregation struct {
aggrType AggrType
groupBy clientmodel.LabelNames
vector VectorNode
}
// An arithmetic expression of vector type.
VectorArithExpr struct {
opType BinOpType
lhs VectorNode
rhs Node
}
)
// ----------------------------------------------------------------------------
// MatrixNode types.
type (
// Matrix literal, i.e. metric name plus labelset and timerange.
MatrixLiteral struct {
labels clientmodel.LabelSet
// Fingerprints are populated from labels at query analysis time.
fingerprints clientmodel.Fingerprints
2013-03-26 11:33:48 +00:00
interval time.Duration
}
)
// ----------------------------------------------------------------------------
// StringNode types.
type (
// String literal.
StringLiteral struct {
str string
}
// A function of string return type.
StringFunctionCall struct {
function *Function
args Nodes
}
)
// ----------------------------------------------------------------------------
// Implementations.
// Node.Type() methods.
func (node ScalarLiteral) Type() ExprType { return SCALAR }
func (node ScalarFunctionCall) Type() ExprType { return SCALAR }
func (node ScalarArithExpr) Type() ExprType { return SCALAR }
func (node VectorLiteral) Type() ExprType { return VECTOR }
func (node VectorFunctionCall) Type() ExprType { return VECTOR }
func (node VectorAggregation) Type() ExprType { return VECTOR }
func (node VectorArithExpr) Type() ExprType { return VECTOR }
func (node MatrixLiteral) Type() ExprType { return MATRIX }
func (node StringLiteral) Type() ExprType { return STRING }
func (node StringFunctionCall) Type() ExprType { return STRING }
// Node.Children() methods.
func (node ScalarLiteral) Children() Nodes { return Nodes{} }
func (node ScalarFunctionCall) Children() Nodes { return node.args }
func (node ScalarArithExpr) Children() Nodes { return Nodes{node.lhs, node.rhs} }
func (node VectorLiteral) Children() Nodes { return Nodes{} }
func (node VectorFunctionCall) Children() Nodes { return node.args }
func (node VectorAggregation) Children() Nodes { return Nodes{node.vector} }
func (node VectorArithExpr) Children() Nodes { return Nodes{node.lhs, node.rhs} }
func (node MatrixLiteral) Children() Nodes { return Nodes{} }
func (node StringLiteral) Children() Nodes { return Nodes{} }
func (node StringFunctionCall) Children() Nodes { return node.args }
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *ScalarLiteral) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) clientmodel.SampleValue {
return node.value
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *ScalarArithExpr) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) clientmodel.SampleValue {
lhs := node.lhs.Eval(timestamp, view)
rhs := node.rhs.Eval(timestamp, view)
return evalScalarBinop(node.opType, lhs, rhs)
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *ScalarFunctionCall) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) clientmodel.SampleValue {
return node.function.callFn(timestamp, view, node.args).(clientmodel.SampleValue)
}
func (node *VectorAggregation) labelsToGroupingKey(labels clientmodel.Metric) uint64 {
summer := fnv.New64a()
for _, label := range node.groupBy {
fmt.Fprint(summer, labels[label])
}
return summer.Sum64()
}
func labelsToKey(labels clientmodel.Metric) uint64 {
pairs := metric.LabelPairs{}
2013-01-17 23:07:00 +00:00
for label, value := range labels {
pairs = append(pairs, &metric.LabelPair{
Name: label,
Value: value,
})
2013-01-17 23:07:00 +00:00
}
sort.Sort(pairs)
summer := fnv.New64a()
for _, pair := range pairs {
fmt.Fprint(summer, pair.Name, pair.Value)
}
return summer.Sum64()
2013-01-15 10:30:55 +00:00
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func EvalVectorInstant(node VectorNode, timestamp clientmodel.Timestamp, storage *metric.TieredStorage, queryStats *stats.TimerGroup) (vector Vector, err error) {
viewAdapter, err := viewAdapterForInstantQuery(node, timestamp, storage, queryStats)
if err != nil {
return
}
vector = node.Eval(timestamp, viewAdapter)
return
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func EvalVectorRange(node VectorNode, start clientmodel.Timestamp, end clientmodel.Timestamp, interval time.Duration, storage *metric.TieredStorage, queryStats *stats.TimerGroup) (Matrix, error) {
// Explicitly initialize to an empty matrix since a nil Matrix encodes to
// null in JSON.
matrix := Matrix{}
viewTimer := queryStats.GetTimer(stats.TotalViewBuildingTime).Start()
viewAdapter, err := viewAdapterForRangeQuery(node, start, end, interval, storage, queryStats)
viewTimer.Stop()
if err != nil {
return nil, err
}
2013-01-17 23:07:00 +00:00
// TODO implement watchdog timer for long-running queries.
evalTimer := queryStats.GetTimer(stats.InnerEvalTime).Start()
sampleSets := map[uint64]*metric.SampleSet{}
for t := start; t.Before(end); t = t.Add(interval) {
vector := node.Eval(t, viewAdapter)
2013-01-17 23:07:00 +00:00
for _, sample := range vector {
samplePair := &metric.SamplePair{
2013-01-17 23:07:00 +00:00
Value: sample.Value,
Timestamp: sample.Timestamp,
}
groupingKey := labelsToKey(sample.Metric)
if sampleSets[groupingKey] == nil {
sampleSets[groupingKey] = &metric.SampleSet{
2013-01-17 23:07:00 +00:00
Metric: sample.Metric,
Values: metric.Values{samplePair},
2013-01-17 23:07:00 +00:00
}
} else {
sampleSets[groupingKey].Values = append(sampleSets[groupingKey].Values, samplePair)
}
}
}
evalTimer.Stop()
2013-01-17 23:07:00 +00:00
appendTimer := queryStats.GetTimer(stats.ResultAppendTime).Start()
2013-01-17 23:07:00 +00:00
for _, sampleSet := range sampleSets {
matrix = append(matrix, *sampleSet)
2013-01-17 23:07:00 +00:00
}
appendTimer.Stop()
return matrix, nil
2013-01-15 10:30:55 +00:00
}
func labelIntersection(metric1, metric2 clientmodel.Metric) clientmodel.Metric {
intersection := clientmodel.Metric{}
for label, value := range metric1 {
if metric2[label] == value {
intersection[label] = value
}
}
return intersection
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *VectorAggregation) groupedAggregationsToVector(aggregations map[uint64]*groupedAggregation, timestamp clientmodel.Timestamp) Vector {
vector := Vector{}
for _, aggregation := range aggregations {
switch node.aggrType {
case AVG:
aggregation.value = aggregation.value / clientmodel.SampleValue(aggregation.groupCount)
case COUNT:
aggregation.value = clientmodel.SampleValue(aggregation.groupCount)
default:
// For other aggregations, we already have the right value.
}
sample := &clientmodel.Sample{
Metric: aggregation.labels,
Value: aggregation.value,
Timestamp: timestamp,
}
vector = append(vector, sample)
}
return vector
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *VectorAggregation) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector {
vector := node.vector.Eval(timestamp, view)
result := map[uint64]*groupedAggregation{}
for _, sample := range vector {
groupingKey := node.labelsToGroupingKey(sample.Metric)
if groupedResult, ok := result[groupingKey]; ok {
groupedResult.labels = labelIntersection(groupedResult.labels, sample.Metric)
switch node.aggrType {
case SUM:
groupedResult.value += sample.Value
case AVG:
groupedResult.value += sample.Value
groupedResult.groupCount++
case MAX:
if groupedResult.value < sample.Value {
groupedResult.value = sample.Value
}
case MIN:
if groupedResult.value > sample.Value {
groupedResult.value = sample.Value
}
case COUNT:
groupedResult.groupCount++
default:
panic("Unknown aggregation type")
}
} else {
result[groupingKey] = &groupedAggregation{
labels: sample.Metric,
value: sample.Value,
groupCount: 1,
}
}
}
return node.groupedAggregationsToVector(result, timestamp)
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *VectorLiteral) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector {
values, err := view.GetValueAtTime(node.fingerprints, timestamp)
if err != nil {
2013-08-12 16:22:48 +00:00
glog.Error("Unable to get vector values: ", err)
return Vector{}
}
return values
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *VectorFunctionCall) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector {
return node.function.callFn(timestamp, view, node.args).(Vector)
}
func evalScalarBinop(opType BinOpType,
lhs clientmodel.SampleValue,
rhs clientmodel.SampleValue) clientmodel.SampleValue {
switch opType {
case ADD:
return lhs + rhs
case SUB:
return lhs - rhs
case MUL:
return lhs * rhs
case DIV:
if rhs != 0 {
return lhs / rhs
} else {
return clientmodel.SampleValue(math.Inf(int(rhs)))
}
case MOD:
if rhs != 0 {
return clientmodel.SampleValue(int(lhs) % int(rhs))
} else {
return clientmodel.SampleValue(math.Inf(int(rhs)))
}
case EQ:
if lhs == rhs {
return 1
} else {
return 0
}
case NE:
if lhs != rhs {
return 1
} else {
return 0
}
case GT:
if lhs > rhs {
return 1
} else {
return 0
}
case LT:
if lhs < rhs {
return 1
} else {
return 0
}
case GE:
if lhs >= rhs {
return 1
} else {
return 0
}
case LE:
if lhs <= rhs {
return 1
} else {
return 0
}
}
panic("Not all enum values enumerated in switch")
}
func evalVectorBinop(opType BinOpType,
lhs clientmodel.SampleValue,
rhs clientmodel.SampleValue) (clientmodel.SampleValue, bool) {
switch opType {
case ADD:
return lhs + rhs, true
case SUB:
return lhs - rhs, true
case MUL:
return lhs * rhs, true
case DIV:
if rhs != 0 {
return lhs / rhs, true
} else {
return clientmodel.SampleValue(math.Inf(int(rhs))), true
}
case MOD:
if rhs != 0 {
return clientmodel.SampleValue(int(lhs) % int(rhs)), true
} else {
return clientmodel.SampleValue(math.Inf(int(rhs))), true
}
case EQ:
if lhs == rhs {
return lhs, true
} else {
return 0, false
}
case NE:
if lhs != rhs {
return lhs, true
} else {
return 0, false
}
case GT:
if lhs > rhs {
return lhs, true
} else {
return 0, false
}
case LT:
if lhs < rhs {
return lhs, true
} else {
return 0, false
}
case GE:
if lhs >= rhs {
return lhs, true
} else {
return 0, false
}
case LE:
if lhs <= rhs {
return lhs, true
} else {
return 0, false
}
case AND:
return lhs, true
case OR:
return lhs, true // TODO: implement OR
}
panic("Not all enum values enumerated in switch")
}
func labelsEqual(labels1, labels2 clientmodel.Metric) bool {
if len(labels1) != len(labels2) {
return false
}
for label, value := range labels1 {
if labels2[label] != value && label != clientmodel.MetricNameLabel {
return false
}
}
return true
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *VectorArithExpr) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector {
lhs := node.lhs.Eval(timestamp, view)
result := Vector{}
if node.rhs.Type() == SCALAR {
rhs := node.rhs.(ScalarNode).Eval(timestamp, view)
for _, lhsSample := range lhs {
value, keep := evalVectorBinop(node.opType, lhsSample.Value, rhs)
if keep {
lhsSample.Value = value
result = append(result, lhsSample)
}
}
return result
} else if node.rhs.Type() == VECTOR {
rhs := node.rhs.(VectorNode).Eval(timestamp, view)
for _, lhsSample := range lhs {
for _, rhsSample := range rhs {
if labelsEqual(lhsSample.Metric, rhsSample.Metric) {
value, keep := evalVectorBinop(node.opType, lhsSample.Value, rhsSample.Value)
if keep {
lhsSample.Value = value
result = append(result, lhsSample)
}
}
}
}
return result
}
panic("Invalid vector arithmetic expression operands")
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *MatrixLiteral) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix {
interval := &metric.Interval{
OldestInclusive: timestamp.Add(-node.interval),
NewestInclusive: timestamp,
}
values, err := view.GetRangeValues(node.fingerprints, interval)
if err != nil {
2013-08-12 16:22:48 +00:00
glog.Error("Unable to get values for vector interval: ", err)
return Matrix{}
}
return values
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *MatrixLiteral) EvalBoundaries(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix {
interval := &metric.Interval{
OldestInclusive: timestamp.Add(-node.interval),
NewestInclusive: timestamp,
}
values, err := view.GetBoundaryValues(node.fingerprints, interval)
if err != nil {
2013-08-12 16:22:48 +00:00
glog.Error("Unable to get boundary values for vector interval: ", err)
return Matrix{}
}
return values
}
2013-01-15 10:30:55 +00:00
func (matrix Matrix) Len() int {
2013-01-17 23:07:00 +00:00
return len(matrix)
2013-01-15 10:30:55 +00:00
}
func (matrix Matrix) Less(i, j int) bool {
return matrix[i].Metric.String() < matrix[j].Metric.String()
2013-01-15 10:30:55 +00:00
}
func (matrix Matrix) Swap(i, j int) {
2013-01-17 23:07:00 +00:00
matrix[i], matrix[j] = matrix[j], matrix[i]
2013-01-15 10:30:55 +00:00
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *StringLiteral) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) string {
return node.str
}
Use custom timestamp type for sample timestamps and related code. So far we've been using Go's native time.Time for anything related to sample timestamps. Since the range of time.Time is much bigger than what we need, this has created two problems: - there could be time.Time values which were out of the range/precision of the time type that we persist to disk, therefore causing incorrectly ordered keys. One bug caused by this was: https://github.com/prometheus/prometheus/issues/367 It would be good to use a timestamp type that's more closely aligned with what the underlying storage supports. - sizeof(time.Time) is 192, while Prometheus should be ok with a single 64-bit Unix timestamp (possibly even a 32-bit one). Since we store samples in large numbers, this seriously affects memory usage. Furthermore, copying/working with the data will be faster if it's smaller. *MEMORY USAGE RESULTS* Initial memory usage comparisons for a running Prometheus with 1 timeseries and 100,000 samples show roughly a 13% decrease in total (VIRT) memory usage. In my tests, this advantage for some reason decreased a bit the more samples the timeseries had (to 5-7% for millions of samples). This I can't fully explain, but perhaps garbage collection issues were involved. *WHEN TO USE THE NEW TIMESTAMP TYPE* The new clientmodel.Timestamp type should be used whenever time calculations are either directly or indirectly related to sample timestamps. For example: - the timestamp of a sample itself - all kinds of watermarks - anything that may become or is compared to a sample timestamp (like the timestamp passed into Target.Scrape()). When to still use time.Time: - for measuring durations/times not related to sample timestamps, like duration telemetry exporting, timers that indicate how frequently to execute some action, etc. *NOTE ON OPERATOR OPTIMIZATION TESTS* We don't use operator optimization code anymore, but it still lives in the code as dead code. It still has tests, but I couldn't get all of them to pass with the new timestamp format. I commented out the failing cases for now, but we should probably remove the dead code soon. I just didn't want to do that in the same change as this. Change-Id: I821787414b0debe85c9fffaeb57abd453727af0f
2013-10-28 13:35:02 +00:00
func (node *StringFunctionCall) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) string {
return node.function.callFn(timestamp, view, node.args).(string)
}
// ----------------------------------------------------------------------------
// Constructors.
func NewScalarLiteral(value clientmodel.SampleValue) *ScalarLiteral {
return &ScalarLiteral{
value: value,
}
}
func NewVectorLiteral(labels clientmodel.LabelSet) *VectorLiteral {
return &VectorLiteral{
labels: labels,
}
}
func NewVectorAggregation(aggrType AggrType, vector VectorNode, groupBy clientmodel.LabelNames) *VectorAggregation {
return &VectorAggregation{
aggrType: aggrType,
groupBy: groupBy,
vector: vector,
}
}
func NewFunctionCall(function *Function, args Nodes) (Node, error) {
if err := function.CheckArgTypes(args); err != nil {
return nil, err
}
switch function.returnType {
case SCALAR:
return &ScalarFunctionCall{
function: function,
args: args,
}, nil
case VECTOR:
return &VectorFunctionCall{
function: function,
args: args,
}, nil
case STRING:
return &StringFunctionCall{
function: function,
args: args,
}, nil
}
panic("Function with invalid return type")
}
func nodesHaveTypes(nodes Nodes, exprTypes []ExprType) bool {
for _, node := range nodes {
correctType := false
for _, exprType := range exprTypes {
if node.Type() == exprType {
correctType = true
}
}
if !correctType {
return false
}
}
return true
}
func NewArithExpr(opType BinOpType, lhs Node, rhs Node) (Node, error) {
if !nodesHaveTypes(Nodes{lhs, rhs}, []ExprType{SCALAR, VECTOR}) {
return nil, errors.New("Binary operands must be of vector or scalar type")
}
if lhs.Type() == SCALAR && rhs.Type() == VECTOR {
return nil, errors.New("Left side of vector binary operation must be of vector type")
}
if opType == AND || opType == OR {
if lhs.Type() == SCALAR || rhs.Type() == SCALAR {
return nil, errors.New("AND and OR operators may only be used between vectors")
}
}
if lhs.Type() == VECTOR || rhs.Type() == VECTOR {
return &VectorArithExpr{
opType: opType,
lhs: lhs.(VectorNode),
rhs: rhs,
}, nil
}
return &ScalarArithExpr{
opType: opType,
lhs: lhs.(ScalarNode),
rhs: rhs.(ScalarNode),
}, nil
}
func NewMatrixLiteral(vector *VectorLiteral, interval time.Duration) *MatrixLiteral {
return &MatrixLiteral{
labels: vector.labels,
interval: interval,
}
}
func NewStringLiteral(str string) *StringLiteral {
return &StringLiteral{
str: str,
}
}