Merge pull request #55710 from rhcs-dashboard/nvmeof-prometheus-endpoint

cephadm/nvmeof: scrape nvmeof prometheus endpoint

Reviewed-by: Adam King <adking@redhat.com>
Reviewed-by: Paul Cuzner <pcuzner@ibm.com>
This commit is contained in:
Adam King 2024-02-28 15:10:21 -05:00 committed by GitHub
commit 1bf6b32246
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 83 additions and 3 deletions

View File

@ -12,7 +12,7 @@ DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'
DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:9.4.12'
DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4'
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:latest'
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.0.0'
DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'

View File

@ -117,7 +117,7 @@ os._exit = os_exit_noop # type: ignore
DEFAULT_IMAGE = 'quay.io/ceph/ceph'
DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0'
DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0'
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:latest'
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.0.0'
DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'

View File

@ -19,6 +19,7 @@ import secrets
from cephadm.services.ingress import IngressSpec
from cephadm.ssl_cert_utils import SSLCerts
from cephadm.services.cephadmservice import CephExporterService
from cephadm.services.nvmeof import NvmeofService
if TYPE_CHECKING:
from cephadm.module import CephadmOrchestrator
@ -145,6 +146,7 @@ class Root(Server):
<p><a href='prometheus/sd-config?service=node-exporter'>Node exporter http sd-config</a></p>
<p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
<p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
<p><a href='prometheus/sd-config?service=nvmeof'>NVMeoF http sd-config</a></p>
<p><a href='prometheus/rules'>Prometheus rules</a></p>
</body>
</html>'''
@ -163,6 +165,8 @@ class Root(Server):
return self.haproxy_sd_config()
elif service == 'ceph-exporter':
return self.ceph_exporter_sd_config()
elif service == 'nvmeof':
return self.nvmeof_sd_config()
else:
return []
@ -231,6 +235,19 @@ class Root(Server):
})
return srv_entries
def nvmeof_sd_config(self) -> List[Dict[str, Collection[str]]]:
"""Return <http_sd_config> compatible prometheus config for nvmeof service."""
srv_entries = []
for dd in self.mgr.cache.get_daemons_by_type('nvmeof'):
assert dd.hostname is not None
addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
port = NvmeofService.PROMETHEUS_PORT
srv_entries.append({
'targets': [build_url(host=addr, port=port).lstrip('/')],
'labels': {'instance': dd.hostname}
})
return srv_entries
@cherrypy.expose(alias='prometheus/rules')
def get_prometheus_rules(self) -> str:
"""Return currently configured prometheus rules as Yaml."""

View File

@ -402,6 +402,7 @@ class PrometheusService(CephadmService):
haproxy_sd_url = f'{srv_end_point}service=haproxy' if haproxy_cnt > 0 else None
mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included
ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included
nvmeof_sd_url = f'{srv_end_point}service=nvmeof' # always included
alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
@ -417,7 +418,8 @@ class PrometheusService(CephadmService):
'node_exporter_sd_url': node_exporter_sd_url,
'alertmanager_sd_url': alertmanager_sd_url,
'haproxy_sd_url': haproxy_sd_url,
'ceph_exporter_sd_url': ceph_exporter_sd_url
'ceph_exporter_sd_url': ceph_exporter_sd_url,
'nvmeof_sd_url': nvmeof_sd_url,
}
web_context = {

View File

@ -16,6 +16,7 @@ logger = logging.getLogger(__name__)
class NvmeofService(CephService):
TYPE = 'nvmeof'
PROMETHEUS_PORT = 10008
def config(self, spec: NvmeofServiceSpec) -> None: # type: ignore
assert self.TYPE == spec.service_type

View File

@ -10,6 +10,9 @@ state_update_interval_sec = 5
min_controller_id = {{ spec.min_controller_id }}
max_controller_id = {{ spec.max_controller_id }}
enable_spdk_discovery_controller = {{ spec.enable_spdk_discovery_controller }}
enable_prometheus_exporter = True
prometheus_exporter_ssl = False
prometheus_port = 10008
[ceph]
pool = {{ spec.pool }}

View File

@ -107,3 +107,23 @@ scrape_configs:
- url: {{ ceph_exporter_sd_url }}
{% endif %}
{% endif %}
{% if nvmeof_sd_url %}
- job_name: 'nvmeof'
{% if secure_monitoring_stack %}
honor_labels: true
scheme: https
tls_config:
ca_file: root_cert.pem
http_sd_configs:
- url: {{ nvmeof_sd_url }}
basic_auth:
username: {{ service_discovery_username }}
password: {{ service_discovery_password }}
tls_config:
ca_file: root_cert.pem
{% else %}
http_sd_configs:
- url: {{ nvmeof_sd_url }}
{% endif %}
{% endif %}

View File

@ -19,6 +19,9 @@ class FakeCache:
if service_type == 'ceph-exporter':
return [FakeDaemonDescription('1.2.3.4', [9926], 'node0'),
FakeDaemonDescription('1.2.3.5', [9926], 'node1')]
if service_type == 'nvmeof':
return [FakeDaemonDescription('1.2.3.4', [10008], 'node0'),
FakeDaemonDescription('1.2.3.5', [10008], 'node1')]
return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
@ -171,6 +174,20 @@ class TestServiceDiscovery:
# check content
assert cfg[0]['targets'] == ['1.2.3.4:9926']
def test_get_sd_config_nvmeof(self):
mgr = FakeMgr()
root = Root(mgr, 5000, '0.0.0.0')
cfg = root.get_sd_config('nvmeof')
# check response structure
assert cfg
for entry in cfg:
assert 'labels' in entry
assert 'targets' in entry
# check content
assert cfg[0]['targets'] == ['1.2.3.4:10008']
def test_get_sd_config_invalid_service(self):
mgr = FakeMgr()
root = Root(mgr, 5000, '0.0.0.0')

View File

@ -393,6 +393,9 @@ state_update_interval_sec = 5
min_controller_id = 1
max_controller_id = 65519
enable_spdk_discovery_controller = False
enable_prometheus_exporter = True
prometheus_exporter_ssl = False
prometheus_port = 10008
[ceph]
pool = {pool}
@ -716,6 +719,10 @@ class TestMonitoring:
honor_labels: true
http_sd_configs:
- url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
- job_name: 'nvmeof'
http_sd_configs:
- url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
""").lstrip()
_run_cephadm.assert_called_with(
@ -872,6 +879,19 @@ class TestMonitoring:
password: sd_password
tls_config:
ca_file: root_cert.pem
- job_name: 'nvmeof'
honor_labels: true
scheme: https
tls_config:
ca_file: root_cert.pem
http_sd_configs:
- url: https://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
basic_auth:
username: sd_user
password: sd_password
tls_config:
ca_file: root_cert.pem
""").lstrip()
_run_cephadm.assert_called_with(