Address various comments from the review

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
beorn7 2019-06-26 23:22:16 +02:00
parent 23c03207e9
commit e34af6d4d3
2 changed files with 18 additions and 18 deletions

View File

@ -87,9 +87,9 @@
{
alert: 'PrometheusTSDBReloadsFailing',
expr: |||
increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[2h]) > 0
increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[3h]) > 0
||| % $._config,
'for': '12h',
'for': '4h',
labels: {
severity: 'warning',
},
@ -100,9 +100,9 @@
{
alert: 'PrometheusTSDBCompactionsFailing',
expr: |||
increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[2h]) > 0
increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[3h]) > 0
||| % $._config,
'for': '12h',
'for': '4h',
labels: {
severity: 'warning',
},
@ -113,7 +113,7 @@
{
alert: 'PrometheusTSDBWALCorruptions',
expr: |||
tsdb_wal_corruptions_total{%(prometheusSelector)s} > 0
increase(tsdb_wal_corruptions_total{%(prometheusSelector)s}[3h]) > 0
||| % $._config,
'for': '4h',
labels: {
@ -153,12 +153,12 @@
alert: 'PrometheusRemoteStorageFailures',
expr: |||
(
rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[1m])
rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[5m])
/
(
rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[1m])
rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[5m])
+
rate(prometheus_remote_storage_succeeded_samples_total{%(prometheusSelector)s}[1m])
rate(prometheus_remote_storage_succeeded_samples_total{%(prometheusSelector)s}[5m])
)
)
* 100
@ -192,10 +192,10 @@
},
{
alert: 'PrometheusRuleFailures',
'for': '15m',
expr: |||
rate(prometheus_rule_evaluation_failures_total{%(prometheusSelector)s}[1m]) > 0
rate(prometheus_rule_evaluation_failures_total{%(prometheusSelector)s}[5m]) > 0
||| % $._config,
'for': '15m',
labels: {
severity: 'critical',
},

View File

@ -7,7 +7,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
.addMultiTemplate('job', 'prometheus_build_info', 'job')
.addMultiTemplate('instance', 'prometheus_build_info', 'instance')
# Prometheus is quite commonly configured with honor_labels set to true;
# therefor job and instance is not the prometheus server in many queries!.
# therefore job and instance is not the prometheus server in many queries!
.addRow(
g.row('Prometheus Stats')
.addPanel(
@ -18,7 +18,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
], {
job: { alias: 'Job' },
instance: { alias: 'Instance' },
verstion: { alias: 'Version' },
version: { alias: 'Version' },
'Value #A': { alias: 'Count', type: 'hidden' },
'Value #B': { alias: 'Uptime' },
})
@ -28,20 +28,20 @@ local g = import 'grafana-builder/grafana.libsonnet';
g.row('Discovery')
.addPanel(
g.panel('Target Sync') +
g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[2m])) by (scrape_job) * 1e3', '{{scrape_job}}') +
g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m])) by (scrape_job) * 1e3', '{{scrape_job}}') +
{ yaxes: g.yaxes('ms') }
)
.addPanel(
g.panel('Targets') +
g.queryPanel('count(up{})', 'Targets') +
g.queryPanel('sum(prometheus_sd_discovered_targets{job=~"$job",instance=~"$instance"})', 'Targets') +
g.stack
)
)
.addRow(
g.row('Retrieval')
.addPanel(
g.panel('Target Scrape Duration') +
g.queryPanel('1e3 * sum(scrape_duration_seconds) / count(scrape_duration_seconds)', 'Average') +
g.panel('Average Scrape Interval Duration') +
g.queryPanel('rate(prometheus_target_interval_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~"$job",instance=~"$instance"}[5m]) * 1e3', '{{interval}} configured') +
{ yaxes: g.yaxes('ms') }
)
.addPanel(
@ -61,7 +61,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
)
.addPanel(
g.panel('Appended Samples') +
g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[1m])', '{{job}} {{instance}}') +
g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[5m])', '{{job}} {{instance}}') +
g.stack
)
)
@ -82,7 +82,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
g.row('Query')
.addPanel(
g.panel('Query Rate') +
g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[1m])', '{{job}} {{instance}}') +
g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', '{{job}} {{instance}}') +
g.stack,
)
.addPanel(