From a878460962018702a10cd21a69a4792243ad63e2 Mon Sep 17 00:00:00 2001 From: Vallari Agrawal Date: Wed, 30 Oct 2024 19:34:01 +0530 Subject: [PATCH] monitoring: add tests for 2 new nvmeof alerts Add test for alerts NVMeoFMissingListener and NVMeoFZeroListenerSubsystem to test_alerts.yml. Signed-off-by: Vallari Agrawal --- .../ceph-mixin/tests_alerts/test_alerts.yml | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/monitoring/ceph-mixin/tests_alerts/test_alerts.yml b/monitoring/ceph-mixin/tests_alerts/test_alerts.yml index a269ff74227..6bcaa53b851 100644 --- a/monitoring/ceph-mixin/tests_alerts/test_alerts.yml +++ b/monitoring/ceph-mixin/tests_alerts/test_alerts.yml @@ -2522,6 +2522,75 @@ tests: exp_annotations: summary: "The number of clients connected to nqn1 is too high on cluster mycluster" description: "The supported limit for clients connecting to a subsystem is 32" + + # NVMeoFMissingListener + - interval: 1m + input_series: + - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1", instance="node-1:9100"}' + values: '0 0 0 0 0 0 0 0 0 0 0' + - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1", instance="node-2:9100"}' + values: '1 1 1 1 1 1 1 1 1 1 1' + - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1", instance="node-3:9100"}' + values: '1 1 1 1 1 1 1 1 1 1 1' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.1", instance="node-1:9100"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.2", instance="node-2:9100"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.3", instance="node-3:9100"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.4", instance="node-4:9100"}' + values: '1+0x20' + promql_expr_test: + - expr: ceph_nvmeof_subsystem_listener_count == 0 and on(nqn) sum(ceph_nvmeof_subsystem_listener_count) by (nqn) > 0 + eval_time: 1m + exp_samples: + - labels: '{__name__="ceph_nvmeof_subsystem_listener_count", instance="node-1:9100", nqn="nqn1"}' + value: 0 + alert_rule_test: + - eval_time: 10m + alertname: NVMeoFMissingListener + exp_alerts: + - exp_labels: + instance: node-1:9100 + nqn: nqn1 + severity: warning + type: ceph_default + exp_annotations: + summary: "No listener added for node-1:9100 NVMe-oF Gateway to nqn1 subsystem" + description: "For every subsystem, each gateway should have a listener to balance traffic between gateways." + + # NVMeoFZeroListenerSubsystem + - interval: 1m + input_series: + - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1"}' + values: '0 0 0 0 0 0 0 0' + - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn2"}' + values: '0 1 1 1 2 2 3 4' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.1"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.2"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.3"}' + values: '1+0x20' + - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.4"}' + values: '1+0x20' + promql_expr_test: + - expr: ceph_nvmeof_subsystem_listener_count == 0 + eval_time: 1m + exp_samples: + - labels: '{__name__="ceph_nvmeof_subsystem_listener_count",nqn="nqn1"}' + value: 0 + alert_rule_test: + - eval_time: 10m + alertname: NVMeoFZeroListenerSubsystem + exp_alerts: + - exp_labels: + nqn: nqn1 + severity: warning + type: ceph_default + exp_annotations: + summary: "No listeners added to nqn1 subsystem" + description: "NVMeoF gateway configuration incomplete; one of the subsystems have zero listeners." # NVMeoFHighHostCPU - interval: 1m