Merge pull request #35106 from p-se/wip-pse-cephadm-custom-monitoring-images

mgr/cephadm: allow custom images for grafana, prometheus, alertmanager and node_exporter
This commit is contained in:
Sebastian Wagner 2020-05-27 18:12:05 +02:00 committed by GitHub
commit 61847ab200
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 108 additions and 29 deletions

View File

@ -76,6 +76,45 @@ completed, you should see something like this from ``ceph orch ls``::
node-exporter 2/2 6s ago docker.io/prom/node-exporter:latest e5a616e4b9cf present
prometheus 1/1 6s ago docker.io/prom/prometheus:latest e935122ab143 present
Using custom images
~~~~~~~~~~~~~~~~~~~
It is possible to install or upgrade monitoring components based on other
images. To do so, the name of the image to be used needs to be stored in the
configuration first. The following configuration options are available.
- ``container_image_prometheus``
- ``container_image_grafana``
- ``container_image_alertmanager``
- ``container_image_node_exporter``
Custom images can be set with the ``ceph config`` command::
ceph config set mgr mgr/cephadm/<option_name> <value>
For example::
ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1
.. note::
By setting a custom image, the default value will be overridden (but not
overwritten). The default value changes when updates become available.
By setting a custom image, you will not be able to update the component
you have set the custom image for automatically. You will need to
manually update the configuration (image name and tag) to be able to
install updates.
If you choose to go with the recommendations instead, you can reset the
custom image you have set before. After that, the default value will be
used again. Use ``ceph config rm`` to reset the configuration option::
ceph config rm mgr mgr/cephadm/<option_name>
For example::
ceph config rm mgr mgr/cephadm/container_image_prometheus
Disabling monitoring
--------------------

View File

@ -484,7 +484,7 @@ def attempt_bind(s, address, port):
def port_in_use(port_num):
# type (int) -> bool
"""Detect whether a port is in use on the local machine - IPv4 and IPv6"""
logger.info('Verifying port %d ...' % (port_num))
logger.info('Verifying port %d ...' % port_num)
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
attempt_bind(s, '0.0.0.0', port_num)
@ -2689,7 +2689,7 @@ def extract_uid_gid_monitoring(daemon_type):
@default_image
def command_deploy():
# type: () -> None
(daemon_type, daemon_id) = args.name.split('.', 1)
daemon_type, daemon_id = args.name.split('.', 1)
l = FileLock(args.fsid)
l.acquire()
@ -2711,8 +2711,8 @@ def command_deploy():
logger.info('%s daemon %s ...' % ('Deploy', args.name))
if daemon_type in Ceph.daemons:
(config, keyring) = get_config_and_keyring()
(uid, gid) = extract_uid_gid()
config, keyring = get_config_and_keyring()
uid, gid = extract_uid_gid()
make_var_run(args.fsid, uid, gid)
c = get_container(args.fsid, daemon_type, daemon_id,
ptrace=args.allow_ptrace)
@ -2723,8 +2723,6 @@ def command_deploy():
elif daemon_type in Monitoring.components:
# monitoring daemon - prometheus, grafana, alertmanager, node-exporter
monitoring_args = [] # type: List[str]
# Default Checks
if not args.reconfig and not redeploy:
daemon_ports = Monitoring.port_map[daemon_type] # type: List[int]
@ -2744,7 +2742,6 @@ def command_deploy():
raise Error("{} deployment requires config-json which must "
"contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args)))
uid, gid = extract_uid_gid_monitoring(daemon_type)
c = get_container(args.fsid, daemon_type, daemon_id)
deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
@ -2753,17 +2750,17 @@ def command_deploy():
elif daemon_type == NFSGanesha.daemon_type:
if not args.reconfig and not redeploy:
NFSGanesha.port_in_use()
(config, keyring) = get_config_and_keyring()
config, keyring = get_config_and_keyring()
# TODO: extract ganesha uid/gid (997, 994) ?
(uid, gid) = extract_uid_gid()
uid, gid = extract_uid_gid()
c = get_container(args.fsid, daemon_type, daemon_id)
deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
config=config, keyring=keyring,
reconfig=args.reconfig)
elif daemon_type == CephIscsi.daemon_type:
(config, keyring) = get_config_and_keyring()
(uid, gid) = extract_uid_gid()
config, keyring = get_config_and_keyring()
uid, gid = extract_uid_gid()
c = get_container(args.fsid, daemon_type, daemon_id)
deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
config=config, keyring=keyring,

View File

@ -1,13 +1,12 @@
import json
import errno
import logging
import time
from threading import Event
from functools import wraps
import string
from typing import List, Dict, Optional, Callable, Tuple, TypeVar, Type, \
Any, NamedTuple, Iterator, Set, Sequence, TYPE_CHECKING, cast, Union
from typing import List, Dict, Optional, Callable, Tuple, TypeVar, \
Any, Set, TYPE_CHECKING, cast
import datetime
import six
@ -186,6 +185,26 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
'desc': 'Container image name, without the tag',
'runtime': True,
},
{
'name': 'container_image_prometheus',
'default': 'prom/prometheus:v2.18.1',
'desc': 'Prometheus container image',
},
{
'name': 'container_image_grafana',
'default': 'ceph/ceph-grafana:latest',
'desc': 'Prometheus container image',
},
{
'name': 'container_image_alertmanager',
'default': 'prom/alertmanager:v0.20.0',
'desc': 'Prometheus container image',
},
{
'name': 'container_image_node_exporter',
'default': 'prom/node-exporter:v0.18.1',
'desc': 'Prometheus container image',
},
{
'name': 'warn_on_stray_hosts',
'type': 'bool',
@ -251,6 +270,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
self.host_check_interval = 0
self.mode = ''
self.container_image_base = ''
self.container_image_prometheus = ''
self.container_image_grafana = ''
self.container_image_alertmanager = ''
self.container_image_node_exporter = ''
self.warn_on_stray_hosts = True
self.warn_on_stray_daemons = True
self.warn_on_failed_host_check = True
@ -873,14 +896,18 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
executable_path))
return executable_path
def _run_cephadm(self, host, entity, command, args,
addr=None,
stdin=None,
def _run_cephadm(self,
host: str,
entity: Optional[str],
command: str,
args: List[str],
addr: Optional[str] = None,
stdin: Optional[str] = None,
no_fsid=False,
error_ok=False,
image=None,
env_vars=None):
# type: (str, Optional[str], str, List[str], Optional[str], Optional[str], bool, bool, Optional[str], Optional[List[str]]) -> Tuple[List[str], List[str], int]
image: Optional[str] = None,
env_vars: Optional[List[str]] = None,
) -> Tuple[List[str], List[str], int]:
"""
Run cephadm on the remote host with the given command + args
@ -902,7 +929,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
assert image or entity
if not image:
daemon_type = entity.split('.', 1)[0] # type: ignore
daemon_type = entity.split('.', 1)[0] # type: ignore
if daemon_type in CEPH_TYPES or \
daemon_type == 'nfs' or \
daemon_type == 'iscsi':
@ -912,7 +939,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
'who': utils.name_to_config_section(entity),
'key': 'container_image',
})
image = image.strip() # type: ignore
image = image.strip() # type: ignore
elif daemon_type == 'prometheus':
image = self.container_image_prometheus
elif daemon_type == 'grafana':
image = self.container_image_grafana
elif daemon_type == 'alertmanager':
image = self.container_image_alertmanager
elif daemon_type == 'node-exporter':
image = self.container_image_node_exporter
self.log.debug('%s container image %s' % (entity, image))
final_args = []
@ -929,8 +965,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
final_args += ['--fsid', self._cluster_fsid]
final_args += args
self.log.debug('args: %s' % (' '.join(final_args)))
if self.mode == 'root':
self.log.debug('args: %s' % (' '.join(final_args)))
if stdin:
self.log.debug('stdin: %s' % stdin)
script = 'injected_argv = ' + json.dumps(final_args) + '\n'
@ -1551,11 +1587,18 @@ you may want to run:
'keyring': keyring,
}
def _create_daemon(self, daemon_type, daemon_id, host,
keyring=None,
extra_args=None, extra_config=None,
def _create_daemon(self,
daemon_type: str,
daemon_id: str,
host: str,
keyring: Optional[str] = None,
extra_args: Optional[List[str]] = None,
extra_config: Optional[Dict[str, Any]] = None,
reconfig=False,
osd_uuid_map=None) -> str:
osd_uuid_map: Optional[Dict[str, Any]] = None,
redeploy=False,
) -> str:
if not extra_args:
extra_args = []
if not extra_config:
@ -1564,7 +1607,7 @@ you may want to run:
start_time = datetime.datetime.utcnow()
deps = [] # type: List[str]
cephadm_config = {} # type: Dict[str, Any]
cephadm_config = {} # type: Dict[str, Any]
if daemon_type == 'prometheus':
cephadm_config, deps = self.prometheus_service.generate_config()
extra_args.extend(['--config-json', '-'])
@ -1597,7 +1640,7 @@ you may want to run:
osd_uuid_map = self.get_osd_uuid_map()
osd_uuid = osd_uuid_map.get(daemon_id)
if not osd_uuid:
raise OrchestratorError('osd.%d not in osdmap' % daemon_id)
raise OrchestratorError('osd.%s not in osdmap' % daemon_id)
extra_args.extend(['--osd-fsid', osd_uuid])
if reconfig: