mirror of
https://github.com/ceph/ceph
synced 2025-02-21 09:57:26 +00:00
Merge pull request #45583 from p-se/monitoring-alert-mtu-group-by-devices
mgr/dashboard: Compare values of MTU alert by device Reviewed-by: Aashish Sharma <aasharma@redhat.com> Reviewed-by: Ernesto Puerta <epuertat@redhat.com> Reviewed-by: Nizamudeen A <nia@redhat.com> Reviewed-by: p-se <NOT@FOUND>
This commit is contained in:
commit
2d1c480f5a
@ -704,7 +704,18 @@ groups:
|
||||
rate of the past 48 hours.
|
||||
|
||||
- alert: CephNodeInconsistentMTU
|
||||
expr: node_network_mtu_bytes{device!="lo"} * (node_network_up{device!="lo"} > 0) != on() group_left() (quantile(0.5, node_network_mtu_bytes{device!="lo"}))
|
||||
expr: |
|
||||
node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
|
||||
scalar(
|
||||
max by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
|
||||
quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
|
||||
)
|
||||
or
|
||||
node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
|
||||
scalar(
|
||||
min by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
|
||||
quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
|
||||
)
|
||||
labels:
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
@ -712,7 +723,7 @@ groups:
|
||||
summary: MTU settings across Ceph hosts are inconsistent
|
||||
description: >
|
||||
Node {{ $labels.instance }} has a different MTU size ({{ $value }})
|
||||
than the median value on device {{ $labels.device }}.
|
||||
than the median of devices named {{ $labels.device }}.
|
||||
|
||||
- name: pools
|
||||
rules:
|
||||
|
@ -546,6 +546,12 @@ tests:
|
||||
- series: 'node_network_mtu_bytes{device="eth4",instance="node-exporter",
|
||||
job="node-exporter"}'
|
||||
values: '9000 9000 9000 9000 9000'
|
||||
- series: 'node_network_mtu_bytes{device="eth4",instance="hostname1",
|
||||
job="node-exporter"}'
|
||||
values: '2200 2200 2200 2200 2200'
|
||||
- series: 'node_network_mtu_bytes{device="eth4",instance="hostname2",
|
||||
job="node-exporter"}'
|
||||
values: '2400 2400 2400 2400 2400'
|
||||
- series: 'node_network_up{device="eth0",instance="node-exporter",
|
||||
job="node-exporter"}'
|
||||
values: '0 0 0 0 0'
|
||||
@ -557,21 +563,50 @@ tests:
|
||||
values: '1 1 1 1 1'
|
||||
- series: 'node_network_up{device="eth3",instance="node-exporter",
|
||||
job="node-exporter"}'
|
||||
values: '0 0 0 0 0'
|
||||
values: '1 1 1 1 1'
|
||||
- series: 'node_network_up{device="eth4",instance="node-exporter",
|
||||
job="node-exporter"}'
|
||||
values: '1 1 1 1 1'
|
||||
- series: 'node_network_up{device="eth4",instance="hostname1",
|
||||
job="node-exporter"}'
|
||||
values: '1 1 1 1 1'
|
||||
- series: 'node_network_up{device="eth4",instance="hostname2",
|
||||
job="node-exporter"}'
|
||||
values: '0 0 0 0 0'
|
||||
promql_expr_test:
|
||||
- expr: node_network_mtu_bytes{device!="lo"} * (node_network_up{device!="lo"} > 0) != on() group_left()
|
||||
(quantile(0.5, node_network_mtu_bytes{device!="lo"}))
|
||||
- expr: |
|
||||
node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
|
||||
scalar(
|
||||
max by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
|
||||
quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
|
||||
)
|
||||
or
|
||||
node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
|
||||
scalar(
|
||||
min by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
|
||||
quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
|
||||
)
|
||||
eval_time: 1m
|
||||
exp_samples:
|
||||
- labels: '{device="eth4", instance="node-exporter", job="node-exporter"}'
|
||||
value: 9000
|
||||
- labels: '{device="eth4", instance="hostname1", job="node-exporter"}'
|
||||
value: 2200
|
||||
alert_rule_test:
|
||||
- eval_time: 1m
|
||||
alertname: CephNodeInconsistentMTU
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
device: eth4
|
||||
instance: hostname1
|
||||
job: node-exporter
|
||||
severity: warning
|
||||
type: ceph_default
|
||||
exp_annotations:
|
||||
summary: MTU settings across Ceph hosts are inconsistent
|
||||
description: >
|
||||
Node hostname1 has a different MTU size (2200)
|
||||
than the median of devices named eth4.
|
||||
- exp_labels:
|
||||
device: eth4
|
||||
instance: node-exporter
|
||||
@ -582,7 +617,7 @@ tests:
|
||||
summary: MTU settings across Ceph hosts are inconsistent
|
||||
description: >
|
||||
Node node-exporter has a different MTU size (9000)
|
||||
than the median value on device eth4.
|
||||
than the median of devices named eth4.
|
||||
|
||||
# pool full, data series has 6 but using topk(5) so to ensure the
|
||||
# results are working as expected
|
||||
|
Loading…
Reference in New Issue
Block a user