Merge pull request #449 from prometheus/beorn7/fix-partitioning-by-outcome
Adjust the partitioning by outcome.
This commit is contained in:
commit
7ca29308e6
|
@ -39,11 +39,6 @@ const (
|
||||||
const (
|
const (
|
||||||
namespace = "prometheus"
|
namespace = "prometheus"
|
||||||
subsystem = "notifications"
|
subsystem = "notifications"
|
||||||
|
|
||||||
result = "result"
|
|
||||||
success = "success"
|
|
||||||
failure = "failure"
|
|
||||||
dropped = "dropped"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -88,7 +83,9 @@ type NotificationHandler struct {
|
||||||
// HTTP client with custom timeout settings.
|
// HTTP client with custom timeout settings.
|
||||||
httpClient httpPoster
|
httpClient httpPoster
|
||||||
|
|
||||||
notificationLatency *prometheus.SummaryVec
|
notificationLatency prometheus.Summary
|
||||||
|
notificationErrors prometheus.Counter
|
||||||
|
notificationDropped prometheus.Counter
|
||||||
notificationsQueueLength prometheus.Gauge
|
notificationsQueueLength prometheus.Gauge
|
||||||
notificationsQueueCapacity prometheus.Metric
|
notificationsQueueCapacity prometheus.Metric
|
||||||
|
|
||||||
|
@ -103,15 +100,24 @@ func NewNotificationHandler(alertmanagerURL string, notificationQueueCapacity in
|
||||||
|
|
||||||
httpClient: utility.NewDeadlineClient(*deadline),
|
httpClient: utility.NewDeadlineClient(*deadline),
|
||||||
|
|
||||||
notificationLatency: prometheus.NewSummaryVec(
|
notificationLatency: prometheus.NewSummary(prometheus.SummaryOpts{
|
||||||
prometheus.SummaryOpts{
|
Namespace: namespace,
|
||||||
Namespace: namespace,
|
Subsystem: subsystem,
|
||||||
Subsystem: subsystem,
|
Name: "latency_milliseconds",
|
||||||
Name: "latency_milliseconds",
|
Help: "Latency quantiles for sending alert notifications (not including dropped notifications).",
|
||||||
Help: "Latency quantiles for sending alert notifications.",
|
}),
|
||||||
},
|
notificationErrors: prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
[]string{result},
|
Namespace: namespace,
|
||||||
),
|
Subsystem: subsystem,
|
||||||
|
Name: "errors_total",
|
||||||
|
Help: "Total number of errors sending alert notifications.",
|
||||||
|
}),
|
||||||
|
notificationDropped: prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
|
Namespace: namespace,
|
||||||
|
Subsystem: subsystem,
|
||||||
|
Name: "dropped_total",
|
||||||
|
Help: "Total number of alert notifications dropped due to alert manager missing in configuration.",
|
||||||
|
}),
|
||||||
notificationsQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{
|
notificationsQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
Subsystem: subsystem,
|
Subsystem: subsystem,
|
||||||
|
@ -175,22 +181,19 @@ func (n *NotificationHandler) Run() {
|
||||||
for reqs := range n.pendingNotifications {
|
for reqs := range n.pendingNotifications {
|
||||||
if n.alertmanagerURL == "" {
|
if n.alertmanagerURL == "" {
|
||||||
glog.Warning("No alert manager configured, not dispatching notification")
|
glog.Warning("No alert manager configured, not dispatching notification")
|
||||||
n.notificationLatency.WithLabelValues(dropped).Observe(0)
|
n.notificationDropped.Inc()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
begin := time.Now()
|
begin := time.Now()
|
||||||
err := n.sendNotifications(reqs)
|
err := n.sendNotifications(reqs)
|
||||||
labelValue := success
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Error("Error sending notification: ", err)
|
glog.Error("Error sending notification: ", err)
|
||||||
labelValue = failure
|
n.notificationErrors.Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
n.notificationLatency.WithLabelValues(labelValue).Observe(
|
n.notificationLatency.Observe(float64(time.Since(begin) / time.Millisecond))
|
||||||
float64(time.Since(begin) / time.Millisecond),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
close(n.stopped)
|
close(n.stopped)
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,7 +59,8 @@ type TSDBQueueManager struct {
|
||||||
drained chan bool
|
drained chan bool
|
||||||
|
|
||||||
samplesCount *prometheus.CounterVec
|
samplesCount *prometheus.CounterVec
|
||||||
sendLatency *prometheus.SummaryVec
|
sendLatency prometheus.Summary
|
||||||
|
sendErrors prometheus.Counter
|
||||||
queueLength prometheus.Gauge
|
queueLength prometheus.Gauge
|
||||||
queueCapacity prometheus.Metric
|
queueCapacity prometheus.Metric
|
||||||
}
|
}
|
||||||
|
@ -81,15 +82,18 @@ func NewTSDBQueueManager(tsdb TSDBClient, queueCapacity int) *TSDBQueueManager {
|
||||||
},
|
},
|
||||||
[]string{result},
|
[]string{result},
|
||||||
),
|
),
|
||||||
sendLatency: prometheus.NewSummaryVec(
|
sendLatency: prometheus.NewSummary(prometheus.SummaryOpts{
|
||||||
prometheus.SummaryOpts{
|
Namespace: namespace,
|
||||||
Namespace: namespace,
|
Subsystem: subsystem,
|
||||||
Subsystem: subsystem,
|
Name: "sent_latency_milliseconds",
|
||||||
Name: "sent_latency_milliseconds",
|
Help: "Latency quantiles for sending sample batches to the remote TSDB.",
|
||||||
Help: "Latency quantiles for sending samples to the remote TSDB.",
|
}),
|
||||||
},
|
sendErrors: prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
[]string{result},
|
Namespace: namespace,
|
||||||
),
|
Subsystem: subsystem,
|
||||||
|
Name: "sent_errors_total",
|
||||||
|
Help: "Total number of errors sending sample batches to the remote TSDB.",
|
||||||
|
}),
|
||||||
queueLength: prometheus.NewGauge(prometheus.GaugeOpts{
|
queueLength: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||||
Namespace: namespace,
|
Namespace: namespace,
|
||||||
Subsystem: subsystem,
|
Subsystem: subsystem,
|
||||||
|
@ -164,9 +168,10 @@ func (t *TSDBQueueManager) sendSamples(s clientmodel.Samples) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Warningf("error sending %d samples to TSDB: %s", len(s), err)
|
glog.Warningf("error sending %d samples to TSDB: %s", len(s), err)
|
||||||
labelValue = failure
|
labelValue = failure
|
||||||
|
t.sendErrors.Inc()
|
||||||
}
|
}
|
||||||
t.samplesCount.WithLabelValues(labelValue).Add(float64(len(s)))
|
t.samplesCount.WithLabelValues(labelValue).Add(float64(len(s)))
|
||||||
t.sendLatency.WithLabelValues(labelValue).Observe(float64(duration))
|
t.sendLatency.Observe(float64(duration))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run continuously sends samples to the TSDB.
|
// Run continuously sends samples to the TSDB.
|
||||||
|
|
Loading…
Reference in New Issue