monitoring: add tests for 2 new nvmeof alerts

Add test for alerts NVMeoFMissingListener and
NVMeoFZeroListenerSubsystem to test_alerts.yml.

Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
This commit is contained in:
Vallari Agrawal 2024-10-30 19:34:01 +05:30
parent 7994fea436
commit a878460962
No known key found for this signature in database
GPG Key ID: 83EC142692896009
1 changed files with 69 additions and 0 deletions

View File

@ -2522,6 +2522,75 @@ tests:
exp_annotations: exp_annotations:
summary: "The number of clients connected to nqn1 is too high on cluster mycluster" summary: "The number of clients connected to nqn1 is too high on cluster mycluster"
description: "The supported limit for clients connecting to a subsystem is 32" description: "The supported limit for clients connecting to a subsystem is 32"
# NVMeoFMissingListener
- interval: 1m
input_series:
- series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1", instance="node-1:9100"}'
values: '0 0 0 0 0 0 0 0 0 0 0'
- series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1", instance="node-2:9100"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1", instance="node-3:9100"}'
values: '1 1 1 1 1 1 1 1 1 1 1'
- series: 'ceph_nvmeof_gateway_info{addr="1.1.1.1", instance="node-1:9100"}'
values: '1+0x20'
- series: 'ceph_nvmeof_gateway_info{addr="1.1.1.2", instance="node-2:9100"}'
values: '1+0x20'
- series: 'ceph_nvmeof_gateway_info{addr="1.1.1.3", instance="node-3:9100"}'
values: '1+0x20'
- series: 'ceph_nvmeof_gateway_info{addr="1.1.1.4", instance="node-4:9100"}'
values: '1+0x20'
promql_expr_test:
- expr: ceph_nvmeof_subsystem_listener_count == 0 and on(nqn) sum(ceph_nvmeof_subsystem_listener_count) by (nqn) > 0
eval_time: 1m
exp_samples:
- labels: '{__name__="ceph_nvmeof_subsystem_listener_count", instance="node-1:9100", nqn="nqn1"}'
value: 0
alert_rule_test:
- eval_time: 10m
alertname: NVMeoFMissingListener
exp_alerts:
- exp_labels:
instance: node-1:9100
nqn: nqn1
severity: warning
type: ceph_default
exp_annotations:
summary: "No listener added for node-1:9100 NVMe-oF Gateway to nqn1 subsystem"
description: "For every subsystem, each gateway should have a listener to balance traffic between gateways."
# NVMeoFZeroListenerSubsystem
- interval: 1m
input_series:
- series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1"}'
values: '0 0 0 0 0 0 0 0'
- series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn2"}'
values: '0 1 1 1 2 2 3 4'
- series: 'ceph_nvmeof_gateway_info{addr="1.1.1.1"}'
values: '1+0x20'
- series: 'ceph_nvmeof_gateway_info{addr="1.1.1.2"}'
values: '1+0x20'
- series: 'ceph_nvmeof_gateway_info{addr="1.1.1.3"}'
values: '1+0x20'
- series: 'ceph_nvmeof_gateway_info{addr="1.1.1.4"}'
values: '1+0x20'
promql_expr_test:
- expr: ceph_nvmeof_subsystem_listener_count == 0
eval_time: 1m
exp_samples:
- labels: '{__name__="ceph_nvmeof_subsystem_listener_count",nqn="nqn1"}'
value: 0
alert_rule_test:
- eval_time: 10m
alertname: NVMeoFZeroListenerSubsystem
exp_alerts:
- exp_labels:
nqn: nqn1
severity: warning
type: ceph_default
exp_annotations:
summary: "No listeners added to nqn1 subsystem"
description: "NVMeoF gateway configuration incomplete; one of the subsystems have zero listeners."
# NVMeoFHighHostCPU # NVMeoFHighHostCPU
- interval: 1m - interval: 1m