From 9d68e81b3279ab7c6fb031d5c1118b37670e7a1e Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Tue, 6 Dec 2016 15:52:50 +0100 Subject: [PATCH 1/2] web: don't return federation errors over HTTP We are writing federation responses streaming. So after the first byte we wrote, the status header is fixed. We cannot return an HTTP error for intermediate error but should just abort and log instead. --- web/federate.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/federate.go b/web/federate.go index 960b55470..8957152ef 100644 --- a/web/federate.go +++ b/web/federate.go @@ -92,7 +92,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) { // creating the new one. if protMetricFam != nil { if err := enc.Encode(protMetricFam); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) + log.With("err", err).Error("federation failed") return } } @@ -133,7 +133,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) { // Still have to ship off the last MetricFamily, if any. if protMetricFam != nil { if err := enc.Encode(protMetricFam); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) + log.With("err", err).Error("federation failed") } } } From cef2e04aa3c97bf30d26b3968e05a22de17e8814 Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Tue, 6 Dec 2016 16:09:50 +0100 Subject: [PATCH 2/2] web: add error counter for federation responses --- web/federate.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/web/federate.go b/web/federate.go index 8957152ef..f910fbab3 100644 --- a/web/federate.go +++ b/web/federate.go @@ -18,6 +18,7 @@ import ( "sort" "github.com/golang/protobuf/proto" + "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" "github.com/prometheus/common/log" @@ -27,6 +28,13 @@ import ( "github.com/prometheus/prometheus/storage/metric" ) +var ( + federationErrors = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "prometheus_web_federation_errors_total", + Help: "Total number of errors that occurred while sending federation responses.", + }) +) + func (h *Handler) federation(w http.ResponseWriter, req *http.Request) { h.mtx.RLock() defer h.mtx.RUnlock() @@ -52,6 +60,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) { q, err := h.storage.Querier() if err != nil { + federationErrors.Inc() http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -59,6 +68,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) { vector, err := q.LastSampleForLabelMatchers(h.context, minTimestamp, matcherSets...) if err != nil { + federationErrors.Inc() http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -92,6 +102,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) { // creating the new one. if protMetricFam != nil { if err := enc.Encode(protMetricFam); err != nil { + federationErrors.Inc() log.With("err", err).Error("federation failed") return } @@ -133,6 +144,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) { // Still have to ship off the last MetricFamily, if any. if protMetricFam != nil { if err := enc.Encode(protMetricFam); err != nil { + federationErrors.Inc() log.With("err", err).Error("federation failed") } }