From 5bb7f00d00ba2d73488630851b352974511c233a Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Tue, 12 May 2020 14:03:15 -0700 Subject: [PATCH] change labelset comparison in promql engine to avoid false positive during detection of duplicates (#7058) * Use go1.14 new hash/maphash to hash both RHS and LHS instead of XOR'ing which has been resulting in hash collisions. Signed-off-by: Callum Styan * Refactor engine labelset signature generation, just use labels.Labels instead of hashes. Signed-off-by: Callum Styan * Address review comments; function comments + store result of lhs.String+rhs.String as key. Signed-off-by: Callum Styan * Replace all signatureFunc usage with signatureFuncString. Signed-off-by: Callum Styan * Make optimizations to labels String function and generation of rhs+lhs as string in resultMetric. Signed-off-by: Callum Styan * Use separate string functions that don't use strconv just for engine maps. Signed-off-by: Callum Styan * Use a byte invalid separator instead of quoting and have a buffer attached to EvalNodeHelper instead of using a global pool in the labels package. Signed-off-by: Callum Styan * Address review comments. Signed-off-by: Callum Styan * Address more review comments, labels has a function that now builds a byte slice without turning it into a string. Signed-off-by: Callum Styan * Use two different non-ascii hex codes as byte separators between labels and between sets of labels when building bytes of a Labels struct. Signed-off-by: Callum Styan * We only need the 2nd byte invalid sep. at the beginning of a labels.Bytes Signed-off-by: Callum Styan --- pkg/labels/labels.go | 60 +++++++++++++++++++-- promql/engine.go | 96 ++++++++++++++++++---------------- promql/functions.go | 10 ++-- promql/testdata/collision.test | 11 ++++ 4 files changed, 124 insertions(+), 53 deletions(-) create mode 100644 promql/testdata/collision.test diff --git a/pkg/labels/labels.go b/pkg/labels/labels.go index 138691e5a..5278cab79 100644 --- a/pkg/labels/labels.go +++ b/pkg/labels/labels.go @@ -22,14 +22,15 @@ import ( "github.com/cespare/xxhash" ) -const sep = '\xff' - // Well-known label names used by Prometheus components. const ( MetricName = "__name__" AlertName = "alertname" BucketLabel = "le" InstanceName = "instance" + + sep = '\xff' + labelSep = '\xfe' ) // Label is a key/value pair of strings. @@ -59,10 +60,25 @@ func (ls Labels) String() string { b.WriteString(strconv.Quote(l.Value)) } b.WriteByte('}') - return b.String() } +// Bytes returns ls as a byte slice. +// It uses an byte invalid character as a separator and so should not be used for printing. +func (ls Labels) Bytes(buf []byte) []byte { + b := bytes.NewBuffer(buf[:0]) + b.WriteByte(labelSep) + for i, l := range ls { + if i > 0 { + b.WriteByte(sep) + } + b.WriteString(l.Name) + b.WriteByte(sep) + b.WriteString(l.Value) + } + return b.Bytes() +} + // MarshalJSON implements json.Marshaler. func (ls Labels) MarshalJSON() ([]byte, error) { return json.Marshal(ls.Map()) @@ -172,6 +188,44 @@ func (ls Labels) HashWithoutLabels(b []byte, names ...string) (uint64, []byte) { return xxhash.Sum64(b), b } +// WithLabels returns a new labels.Labels from ls that only contains labels matching names. +// 'names' have to be sorted in ascending order. +func (ls Labels) WithLabels(names ...string) Labels { + ret := make([]Label, 0, len(ls)) + + i, j := 0, 0 + for i < len(ls) && j < len(names) { + if names[j] < ls[i].Name { + j++ + } else if ls[i].Name < names[j] { + i++ + } else { + ret = append(ret, ls[i]) + i++ + j++ + } + } + return ret +} + +// WithLabels returns a new labels.Labels from ls that contains labels not matching names. +// 'names' have to be sorted in ascending order. +func (ls Labels) WithoutLabels(names ...string) Labels { + ret := make([]Label, 0, len(ls)) + + j := 0 + for i := range ls { + for j < len(names) && names[j] < ls[i].Name { + j++ + } + if ls[i].Name == MetricName || (j < len(names) && ls[i].Name == names[j]) { + continue + } + ret = append(ret, ls[i]) + } + return ret +} + // Copy returns a copy of the labels. func (ls Labels) Copy() Labels { res := make(Labels, len(ls)) diff --git a/promql/engine.go b/promql/engine.go index 2360f34ac..c3f000236 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -14,6 +14,7 @@ package promql import ( + "bytes" "container/heap" "context" "fmt" @@ -831,16 +832,20 @@ type EvalNodeHelper struct { // dropMetricName and label_*. dmn map[uint64]labels.Labels // signatureFunc. - sigf map[uint64]uint64 + sigf map[string]string // funcHistogramQuantile. - signatureToMetricWithBuckets map[uint64]*metricWithBuckets + signatureToMetricWithBuckets map[string]*metricWithBuckets // label_replace. regex *regexp.Regexp + lb *labels.Builder + lblBuf []byte + lblResultBuf []byte + // For binary vector matching. - rightSigs map[uint64]Sample - matchedSigs map[uint64]map[uint64]struct{} - resultMetric map[uint64]labels.Labels + rightSigs map[string]Sample + matchedSigs map[string]map[uint64]struct{} + resultMetric map[string]labels.Labels } // dropMetricName is a cached version of dropMetricName. @@ -858,20 +863,19 @@ func (enh *EvalNodeHelper) dropMetricName(l labels.Labels) labels.Labels { return ret } -// signatureFunc is a cached version of signatureFunc. -func (enh *EvalNodeHelper) signatureFunc(on bool, names ...string) func(labels.Labels) uint64 { +func (enh *EvalNodeHelper) signatureFunc(on bool, names ...string) func(labels.Labels) string { if enh.sigf == nil { - enh.sigf = make(map[uint64]uint64, len(enh.out)) + enh.sigf = make(map[string]string, len(enh.out)) } - f := signatureFunc(on, names...) - return func(l labels.Labels) uint64 { - h := l.Hash() - ret, ok := enh.sigf[h] + f := signatureFunc(on, enh.lblBuf, names...) + return func(l labels.Labels) string { + enh.lblBuf = l.Bytes(enh.lblBuf) + ret, ok := enh.sigf[string(enh.lblBuf)] if ok { return ret } ret = f(l) - enh.sigf[h] = ret + enh.sigf[string(enh.lblBuf)] = ret return ret } } @@ -1527,7 +1531,7 @@ func (ev *evaluator) VectorAnd(lhs, rhs Vector, matching *parser.VectorMatching, sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...) // The set of signatures for the right-hand side Vector. - rightSigs := map[uint64]struct{}{} + rightSigs := map[string]struct{}{} // Add all rhs samples to a map so we can easily find matches later. for _, rs := range rhs { rightSigs[sigf(rs.Metric)] = struct{}{} @@ -1548,7 +1552,7 @@ func (ev *evaluator) VectorOr(lhs, rhs Vector, matching *parser.VectorMatching, } sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...) - leftSigs := map[uint64]struct{}{} + leftSigs := map[string]struct{}{} // Add everything from the left-hand-side Vector. for _, ls := range lhs { leftSigs[sigf(ls.Metric)] = struct{}{} @@ -1569,7 +1573,7 @@ func (ev *evaluator) VectorUnless(lhs, rhs Vector, matching *parser.VectorMatchi } sigf := enh.signatureFunc(matching.On, matching.MatchingLabels...) - rightSigs := map[uint64]struct{}{} + rightSigs := map[string]struct{}{} for _, rs := range rhs { rightSigs[sigf(rs.Metric)] = struct{}{} } @@ -1598,7 +1602,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * // All samples from the rhs hashed by the matching label/values. if enh.rightSigs == nil { - enh.rightSigs = make(map[uint64]Sample, len(enh.out)) + enh.rightSigs = make(map[string]Sample, len(enh.out)) } else { for k := range enh.rightSigs { delete(enh.rightSigs, k) @@ -1628,7 +1632,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * // Tracks the match-signature. For one-to-one operations the value is nil. For many-to-one // the value is a set of signatures to detect duplicated result elements. if enh.matchedSigs == nil { - enh.matchedSigs = make(map[uint64]map[uint64]struct{}, len(rightSigs)) + enh.matchedSigs = make(map[string]map[uint64]struct{}, len(rightSigs)) } else { for k := range enh.matchedSigs { delete(enh.matchedSigs, k) @@ -1662,7 +1666,6 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * continue } metric := resultMetric(ls.Metric, rs.Metric, op, matching, enh) - insertedSigs, exists := matchedSigs[sig] if matching.Card == parser.CardOneToOne { if exists { @@ -1692,19 +1695,15 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * return enh.out } -// signatureFunc returns a function that calculates the signature for a metric -// ignoring the provided labels. If on, then the given labels are only used instead. -func signatureFunc(on bool, names ...string) func(labels.Labels) uint64 { +func signatureFunc(on bool, b []byte, names ...string) func(labels.Labels) string { sort.Strings(names) if on { - return func(lset labels.Labels) uint64 { - h, _ := lset.HashForLabels(make([]byte, 0, 1024), names...) - return h + return func(lset labels.Labels) string { + return string(lset.WithLabels(names...).Bytes(b)) } } - return func(lset labels.Labels) uint64 { - h, _ := lset.HashWithoutLabels(make([]byte, 0, 1024), names...) - return h + return func(lset labels.Labels) string { + return string(lset.WithoutLabels(names...).Bytes(b)) } } @@ -1712,22 +1711,29 @@ func signatureFunc(on bool, names ...string) func(labels.Labels) uint64 { // binary operation and the matching options. func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.VectorMatching, enh *EvalNodeHelper) labels.Labels { if enh.resultMetric == nil { - enh.resultMetric = make(map[uint64]labels.Labels, len(enh.out)) + enh.resultMetric = make(map[string]labels.Labels, len(enh.out)) } - // op and matching are always the same for a given node, so - // there's no need to include them in the hash key. - // If the lhs and rhs are the same then the xor would be 0, - // so add in one side to protect against that. - lh := lhs.Hash() - h := (lh ^ rhs.Hash()) + lh - if ret, ok := enh.resultMetric[h]; ok { + + if enh.lb == nil { + enh.lb = labels.NewBuilder(lhs) + } else { + enh.lb.Reset(lhs) + } + + buf := bytes.NewBuffer(enh.lblResultBuf[:0]) + enh.lblBuf = lhs.Bytes(enh.lblBuf) + buf.Write(enh.lblBuf) + enh.lblBuf = rhs.Bytes(enh.lblBuf) + buf.Write(enh.lblBuf) + enh.lblResultBuf = buf.Bytes() + + if ret, ok := enh.resultMetric[string(enh.lblResultBuf)]; ok { return ret } - - lb := labels.NewBuilder(lhs) + str := string(enh.lblResultBuf) if shouldDropMetricName(op) { - lb.Del(labels.MetricName) + enh.lb.Del(labels.MetricName) } if matching.Card == parser.CardOneToOne { @@ -1739,23 +1745,23 @@ func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.V continue Outer } } - lb.Del(l.Name) + enh.lb.Del(l.Name) } } else { - lb.Del(matching.MatchingLabels...) + enh.lb.Del(matching.MatchingLabels...) } } for _, ln := range matching.Include { // Included labels from the `group_x` modifier are taken from the "one"-side. if v := rhs.Get(ln); v != "" { - lb.Set(ln, v) + enh.lb.Set(ln, v) } else { - lb.Del(ln) + enh.lb.Del(ln) } } - ret := lb.Labels() - enh.resultMetric[h] = ret + ret := enh.lb.Labels() + enh.resultMetric[str] = ret return ret } diff --git a/promql/functions.go b/promql/functions.go index 49efadaf3..454ff4fa5 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -598,10 +598,10 @@ func funcPredictLinear(vals []parser.Value, args parser.Expressions, enh *EvalNo func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector { q := vals[0].(Vector)[0].V inVec := vals[1].(Vector) - sigf := enh.signatureFunc(false, excludedLabels...) + sigf := signatureFunc(false, enh.lblBuf, excludedLabels...) if enh.signatureToMetricWithBuckets == nil { - enh.signatureToMetricWithBuckets = map[uint64]*metricWithBuckets{} + enh.signatureToMetricWithBuckets = map[string]*metricWithBuckets{} } else { for _, v := range enh.signatureToMetricWithBuckets { v.buckets = v.buckets[:0] @@ -616,16 +616,16 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev // TODO(beorn7): Issue a warning somehow. continue } - hash := sigf(el.Metric) + l := sigf(el.Metric) - mb, ok := enh.signatureToMetricWithBuckets[hash] + mb, ok := enh.signatureToMetricWithBuckets[l] if !ok { el.Metric = labels.NewBuilder(el.Metric). Del(labels.BucketLabel, labels.MetricName). Labels() mb = &metricWithBuckets{el.Metric, nil} - enh.signatureToMetricWithBuckets[hash] = mb + enh.signatureToMetricWithBuckets[l] = mb } mb.buckets = append(mb.buckets, bucket{upperBound, el.V}) } diff --git a/promql/testdata/collision.test b/promql/testdata/collision.test new file mode 100644 index 000000000..657c32f58 --- /dev/null +++ b/promql/testdata/collision.test @@ -0,0 +1,11 @@ + +load 1s + node_namespace_pod:kube_pod_info:{namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 + node_cpu_seconds_total{cpu="10",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449 + node_cpu_seconds_total{cpu="35",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449 + node_cpu_seconds_total{cpu="89",endpoint="https",instance="10.253.57.87:9100",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v",service="node-exporter"} 449 + +eval instant at 4s count by(namespace, pod, cpu) (node_cpu_seconds_total{cpu=~".*",job="node-exporter",mode="idle",namespace="observability",pod="node-exporter-l454v"}) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{namespace="observability",pod="node-exporter-l454v"} + {cpu="10",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 + {cpu="35",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 + {cpu="89",namespace="observability",node="gke-search-infra-custom-96-253440-fli-d135b119-jx00",pod="node-exporter-l454v"} 1 \ No newline at end of file