mirror of
https://github.com/ceph/ceph
synced 2025-02-22 02:27:29 +00:00
mgr/cephadm: config service_url in Dashboard for Prometheus and AlertManager
Calling Dashboard's CLI to set service URLs after deploying Prometheus and AlertManager Daemons. Fixes: https://tracker.ceph.com/issues/45625 Signed-off-by: Kiefer Chang <kiefer.chang@suse.com>
This commit is contained in:
parent
7afd5b3d2f
commit
a6135429b9
@ -1,6 +1,7 @@
|
||||
import json
|
||||
import errno
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from threading import Event
|
||||
from functools import wraps
|
||||
|
||||
@ -30,7 +31,7 @@ from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpe
|
||||
from . import remotes
|
||||
from . import utils
|
||||
from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \
|
||||
RbdMirrorService, CrashService
|
||||
RbdMirrorService, CrashService, CephadmService
|
||||
from .services.iscsi import IscsiService
|
||||
from .services.nfs import NFSService
|
||||
from .services.osd import RemoveUtil, OSDRemoval, OSDService
|
||||
@ -338,6 +339,21 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
|
||||
self.node_exporter_service = NodeExporterService(self)
|
||||
self.crash_service = CrashService(self)
|
||||
self.iscsi_service = IscsiService(self)
|
||||
self.cephadm_services = {
|
||||
'mon': self.mon_service,
|
||||
'mgr': self.mgr_service,
|
||||
'osd': self.osd_service,
|
||||
'mds': self.mds_service,
|
||||
'rgw': self.rgw_service,
|
||||
'rbd-mirror': self.rbd_mirror_service,
|
||||
'nfs': self.nfs_service,
|
||||
'grafana': self.grafana_service,
|
||||
'alertmanager': self.alertmanager_service,
|
||||
'prometheus': self.prometheus_service,
|
||||
'node-exporter': self.node_exporter_service,
|
||||
'crash': self.crash_service,
|
||||
'iscsi': self.iscsi_service,
|
||||
}
|
||||
|
||||
def shutdown(self):
|
||||
self.log.debug('shutdown')
|
||||
@ -346,6 +362,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
|
||||
self.run = False
|
||||
self.event.set()
|
||||
|
||||
def _get_cephadm_service(self, service_type: str) -> CephadmService:
|
||||
assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES
|
||||
return self.cephadm_services[service_type]
|
||||
|
||||
def _kick_serve_loop(self):
|
||||
self.log.debug('_kick_serve_loop')
|
||||
self.event.set()
|
||||
@ -1858,8 +1878,7 @@ you may want to run:
|
||||
last_monmap = None # just in case clocks are skewed
|
||||
|
||||
daemons = self.cache.get_daemons()
|
||||
grafanas = [] # type: List[orchestrator.DaemonDescription]
|
||||
iscsi_daemons = []
|
||||
daemons_post = defaultdict(list)
|
||||
for dd in daemons:
|
||||
# orphan?
|
||||
spec = self.spec_store.specs.get(dd.service_name(), None)
|
||||
@ -1873,12 +1892,10 @@ you may want to run:
|
||||
if spec and spec.unmanaged:
|
||||
continue
|
||||
|
||||
# dependencies?
|
||||
if dd.daemon_type == 'grafana':
|
||||
# put running instances at the front of the list
|
||||
grafanas.insert(0, dd)
|
||||
elif dd.daemon_type == 'iscsi':
|
||||
iscsi_daemons.append(dd)
|
||||
# These daemon types require additional configs after creation
|
||||
if dd.daemon_type in ['grafana', 'iscsi', 'prometheus', 'alertmanager']:
|
||||
daemons_post[dd.daemon_type].append(dd)
|
||||
|
||||
deps = self._calc_daemon_deps(dd.daemon_type, dd.daemon_id)
|
||||
last_deps, last_config = self.cache.get_daemon_last_config_deps(
|
||||
dd.hostname, dd.name())
|
||||
@ -1904,10 +1921,9 @@ you may want to run:
|
||||
self._create_daemon(dd.daemon_type, dd.daemon_id,
|
||||
dd.hostname, reconfig=True)
|
||||
|
||||
if grafanas:
|
||||
self.grafana_service.daemon_check_post(grafanas)
|
||||
if iscsi_daemons:
|
||||
self.iscsi_service.daemon_check_post(iscsi_daemons)
|
||||
# do daemon post actions
|
||||
for daemon_type, daemon_descs in daemons_post.items():
|
||||
self._get_cephadm_service(daemon_type).daemon_check_post(daemon_descs)
|
||||
|
||||
def _add_daemon(self, daemon_type, spec,
|
||||
create_func: Callable[..., T], config_func=None) -> List[T]:
|
||||
|
@ -1,6 +1,8 @@
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
from mgr_module import MonCommandFailed
|
||||
|
||||
from ceph.deployment.service_spec import ServiceSpec, RGWSpec
|
||||
from orchestrator import OrchestratorError, DaemonDescription
|
||||
from cephadm import utils
|
||||
@ -22,6 +24,38 @@ class CephadmService:
|
||||
"""The post actions needed to be done after daemons are checked"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
|
||||
raise NotImplementedError()
|
||||
|
||||
def _inventory_get_addr(self, hostname: str):
|
||||
"""Get a host's address with its hostname."""
|
||||
return self.mgr.inventory.get_addr(hostname)
|
||||
|
||||
def _set_service_url_on_dashboard(self,
|
||||
service_name: str,
|
||||
get_mon_cmd: str,
|
||||
set_mon_cmd: str,
|
||||
service_url: str):
|
||||
"""A helper to get and set service_url via Dashboard's MON command."""
|
||||
try:
|
||||
_, out, _ = self.mgr.check_mon_command({
|
||||
'prefix': get_mon_cmd
|
||||
})
|
||||
except MonCommandFailed as e:
|
||||
logger.warning('Failed to get service URL for %s: %s', service_name, e)
|
||||
return
|
||||
if out.strip() != service_url:
|
||||
try:
|
||||
logger.info(
|
||||
'Setting service URL %s for %s in the Dashboard', service_url, service_name)
|
||||
_, out, _ = self.mgr.check_mon_command({
|
||||
'prefix': set_mon_cmd,
|
||||
'value': service_url,
|
||||
})
|
||||
except MonCommandFailed as e:
|
||||
logger.warning('Failed to set service URL %s for %s in the Dashboard: %s',
|
||||
service_url, service_name, e)
|
||||
|
||||
|
||||
class MonService(CephadmService):
|
||||
def create(self, name, host, network):
|
||||
|
@ -93,7 +93,7 @@ class IscsiService(CephadmService):
|
||||
logger.warning(
|
||||
'Unable to add iSCSI gateway to the Dashboard for %s: %s', dd, reason)
|
||||
continue
|
||||
host = self.mgr.inventory.get_addr(dd.hostname)
|
||||
host = self._inventory_get_addr(dd.hostname)
|
||||
service_url = 'http://{}:{}@{}:{}'.format(
|
||||
spec.api_user, spec.api_password, host, spec.api_port or '5000')
|
||||
gw = gateways.get(dd.hostname)
|
||||
|
@ -9,6 +9,8 @@ from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class GrafanaService(CephadmService):
|
||||
DEFAULT_SERVICE_PORT = 3000
|
||||
|
||||
def create(self, daemon_id, host):
|
||||
# type: (str, str) -> str
|
||||
return self.mgr._create_daemon('grafana', daemon_id, host)
|
||||
@ -92,12 +94,12 @@ datasources:
|
||||
protocol = https
|
||||
cert_file = /etc/grafana/certs/cert_file
|
||||
cert_key = /etc/grafana/certs/cert_key
|
||||
http_port = 3000
|
||||
http_port = {}
|
||||
[security]
|
||||
admin_user = admin
|
||||
admin_password = admin
|
||||
allow_embedding = true
|
||||
""",
|
||||
""".format(self.DEFAULT_SERVICE_PORT),
|
||||
'provisioning/datasources/ceph-dashboard.yml': generate_grafana_ds_config(prom_services),
|
||||
'certs/cert_file': '# generated by cephadm\n%s' % cert,
|
||||
'certs/cert_key': '# generated by cephadm\n%s' % pkey,
|
||||
@ -105,21 +107,25 @@ datasources:
|
||||
}
|
||||
return config_file, sorted(deps)
|
||||
|
||||
def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
|
||||
# make sure the dashboard [does not] references grafana
|
||||
try:
|
||||
current_url = self.mgr.get_module_option_ex('dashboard', 'GRAFANA_API_URL')
|
||||
host = daemon_descrs[0].hostname
|
||||
url = f'https://{self.mgr.inventory.get_addr(host)}:3000'
|
||||
if current_url != url:
|
||||
logger.info('Setting dashboard grafana config to %s' % url)
|
||||
self.mgr.set_module_option_ex('dashboard', 'GRAFANA_API_URL', url)
|
||||
# FIXME: is it a signed cert??
|
||||
except Exception as e:
|
||||
logger.debug('got exception fetching dashboard grafana state: %s', e)
|
||||
def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
|
||||
# Use the least-created one as the active daemon
|
||||
return daemon_descrs[-1]
|
||||
|
||||
def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
|
||||
# TODO: signed cert
|
||||
dd = self.get_active_daemon(daemon_descrs)
|
||||
service_url = 'https://{}:{}'.format(
|
||||
self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
|
||||
self._set_service_url_on_dashboard(
|
||||
'Grafana',
|
||||
'dashboard get-grafana-api-url',
|
||||
'dashboard set-grafana-api-url',
|
||||
service_url
|
||||
)
|
||||
|
||||
class AlertmanagerService(CephadmService):
|
||||
DEFAULT_SERVICE_PORT = 9093
|
||||
|
||||
def create(self, daemon_id, host) -> str:
|
||||
return self.mgr._create_daemon('alertmanager', daemon_id, host)
|
||||
|
||||
@ -185,8 +191,24 @@ receivers:
|
||||
"peers": peers
|
||||
}, sorted(deps)
|
||||
|
||||
def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
|
||||
# TODO: if there are multiple daemons, who is the active one?
|
||||
return daemon_descrs[0]
|
||||
|
||||
def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
|
||||
dd = self.get_active_daemon(daemon_descrs)
|
||||
service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
|
||||
self._set_service_url_on_dashboard(
|
||||
'AlertManager',
|
||||
'dashboard get-alertmanager-api-host',
|
||||
'dashboard set-alertmanager-api-host',
|
||||
service_url
|
||||
)
|
||||
|
||||
|
||||
class PrometheusService(CephadmService):
|
||||
DEFAULT_SERVICE_PORT = 9095
|
||||
|
||||
def create(self, daemon_id, host) -> str:
|
||||
return self.mgr._create_daemon('prometheus', daemon_id, host)
|
||||
|
||||
@ -283,6 +305,20 @@ scrape_configs:
|
||||
|
||||
return r, sorted(deps)
|
||||
|
||||
def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
|
||||
# TODO: if there are multiple daemons, who is the active one?
|
||||
return daemon_descrs[0]
|
||||
|
||||
def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
|
||||
dd = self.get_active_daemon(daemon_descrs)
|
||||
service_url = 'http://{}:{}'.format(
|
||||
self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
|
||||
self._set_service_url_on_dashboard(
|
||||
'Prometheus',
|
||||
'dashboard get-prometheus-api-host',
|
||||
'dashboard set-prometheus-api-host',
|
||||
service_url
|
||||
)
|
||||
|
||||
class NodeExporterService(CephadmService):
|
||||
def create(self, daemon_id, host) -> str:
|
||||
|
33
src/pybind/mgr/cephadm/tests/test_services.py
Normal file
33
src/pybind/mgr/cephadm/tests/test_services.py
Normal file
@ -0,0 +1,33 @@
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from cephadm.services.cephadmservice import CephadmService
|
||||
|
||||
|
||||
class FakeMgr:
|
||||
def __init__(self):
|
||||
self.config = ''
|
||||
self.check_mon_command = MagicMock(side_effect=self._check_mon_command)
|
||||
|
||||
def _check_mon_command(self, cmd_dict):
|
||||
prefix = cmd_dict.get('prefix')
|
||||
if prefix == 'get-cmd':
|
||||
return 0, self.config, ''
|
||||
if prefix == 'set-cmd':
|
||||
self.config = cmd_dict.get('value')
|
||||
return 0, 'value set', ''
|
||||
return -1, '', 'error'
|
||||
|
||||
|
||||
class TestCephadmService:
|
||||
def test_set_service_url_on_dashboard(self):
|
||||
# pylint: disable=protected-access
|
||||
mgr = FakeMgr()
|
||||
service_url = 'http://svc:1000'
|
||||
service = CephadmService(mgr)
|
||||
service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
|
||||
assert mgr.config == service_url
|
||||
|
||||
# set-cmd should not be called if value doesn't change
|
||||
mgr.check_mon_command.reset_mock()
|
||||
service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
|
||||
mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'})
|
Loading…
Reference in New Issue
Block a user