monitoring: add 2 nvmeof alerts to prometheus_alerts.yaml

- `NVMeoFMissingListener`: trigger if all listeners
     are not created for each gateway in a subsystem
- `NVMeoFZeroListenerSubsystem`: trigger if a subsystem has no listeners

Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
This commit is contained in:
Vallari Agrawal 2024-10-09 02:37:48 +05:30 committed by Vallari Agrawal
parent 505f1a67a9
commit f02e312844
No known key found for this signature in database
GPG Key ID: 83EC142692896009
1 changed files with 18 additions and 0 deletions

View File

@ -837,6 +837,24 @@ groups:
labels:
severity: "warning"
type: "ceph_default"
- alert: "NVMeoFMissingListener"
annotations:
description: "For every subsystem, each gateway should have a listener to balance traffic between gateways."
summary: "No listener added for {{ $labels.instance }} NVMe-oF Gateway to {{ $labels.nqn }} subsystem"
expr: "ceph_nvmeof_subsystem_listener_count == 0 and on(nqn) sum(ceph_nvmeof_subsystem_listener_count) by (nqn) > 0"
for: "10m"
labels:
severity: "warning"
type: "ceph_default"
- alert: "NVMeoFZeroListenerSubsystem"
annotations:
description: "NVMeoF gateway configuration incomplete; one of the subsystems have zero listeners."
summary: "No listeners added to {{ $labels.nqn }} subsystem"
expr: "sum(ceph_nvmeof_subsystem_listener_count) by (nqn) == 0"
for: "10m"
labels:
severity: "warning"
type: "ceph_default"
- alert: "NVMeoFHighHostCPU"
annotations:
description: "High CPU on a gateway host can lead to CPU contention and performance degradation"