mirror of
https://github.com/ceph/ceph
synced 2025-01-18 09:02:08 +00:00
Merge pull request #35106 from p-se/wip-pse-cephadm-custom-monitoring-images
mgr/cephadm: allow custom images for grafana, prometheus, alertmanager and node_exporter
This commit is contained in:
commit
61847ab200
@ -76,6 +76,45 @@ completed, you should see something like this from ``ceph orch ls``::
|
||||
node-exporter 2/2 6s ago docker.io/prom/node-exporter:latest e5a616e4b9cf present
|
||||
prometheus 1/1 6s ago docker.io/prom/prometheus:latest e935122ab143 present
|
||||
|
||||
Using custom images
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It is possible to install or upgrade monitoring components based on other
|
||||
images. To do so, the name of the image to be used needs to be stored in the
|
||||
configuration first. The following configuration options are available.
|
||||
|
||||
- ``container_image_prometheus``
|
||||
- ``container_image_grafana``
|
||||
- ``container_image_alertmanager``
|
||||
- ``container_image_node_exporter``
|
||||
|
||||
Custom images can be set with the ``ceph config`` command::
|
||||
|
||||
ceph config set mgr mgr/cephadm/<option_name> <value>
|
||||
|
||||
For example::
|
||||
|
||||
ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1
|
||||
|
||||
.. note::
|
||||
|
||||
By setting a custom image, the default value will be overridden (but not
|
||||
overwritten). The default value changes when updates become available.
|
||||
By setting a custom image, you will not be able to update the component
|
||||
you have set the custom image for automatically. You will need to
|
||||
manually update the configuration (image name and tag) to be able to
|
||||
install updates.
|
||||
|
||||
If you choose to go with the recommendations instead, you can reset the
|
||||
custom image you have set before. After that, the default value will be
|
||||
used again. Use ``ceph config rm`` to reset the configuration option::
|
||||
|
||||
ceph config rm mgr mgr/cephadm/<option_name>
|
||||
|
||||
For example::
|
||||
|
||||
ceph config rm mgr mgr/cephadm/container_image_prometheus
|
||||
|
||||
Disabling monitoring
|
||||
--------------------
|
||||
|
||||
|
@ -484,7 +484,7 @@ def attempt_bind(s, address, port):
|
||||
def port_in_use(port_num):
|
||||
# type (int) -> bool
|
||||
"""Detect whether a port is in use on the local machine - IPv4 and IPv6"""
|
||||
logger.info('Verifying port %d ...' % (port_num))
|
||||
logger.info('Verifying port %d ...' % port_num)
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
attempt_bind(s, '0.0.0.0', port_num)
|
||||
@ -2689,7 +2689,7 @@ def extract_uid_gid_monitoring(daemon_type):
|
||||
@default_image
|
||||
def command_deploy():
|
||||
# type: () -> None
|
||||
(daemon_type, daemon_id) = args.name.split('.', 1)
|
||||
daemon_type, daemon_id = args.name.split('.', 1)
|
||||
|
||||
l = FileLock(args.fsid)
|
||||
l.acquire()
|
||||
@ -2711,8 +2711,8 @@ def command_deploy():
|
||||
logger.info('%s daemon %s ...' % ('Deploy', args.name))
|
||||
|
||||
if daemon_type in Ceph.daemons:
|
||||
(config, keyring) = get_config_and_keyring()
|
||||
(uid, gid) = extract_uid_gid()
|
||||
config, keyring = get_config_and_keyring()
|
||||
uid, gid = extract_uid_gid()
|
||||
make_var_run(args.fsid, uid, gid)
|
||||
c = get_container(args.fsid, daemon_type, daemon_id,
|
||||
ptrace=args.allow_ptrace)
|
||||
@ -2723,8 +2723,6 @@ def command_deploy():
|
||||
|
||||
elif daemon_type in Monitoring.components:
|
||||
# monitoring daemon - prometheus, grafana, alertmanager, node-exporter
|
||||
monitoring_args = [] # type: List[str]
|
||||
|
||||
# Default Checks
|
||||
if not args.reconfig and not redeploy:
|
||||
daemon_ports = Monitoring.port_map[daemon_type] # type: List[int]
|
||||
@ -2744,7 +2742,6 @@ def command_deploy():
|
||||
raise Error("{} deployment requires config-json which must "
|
||||
"contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args)))
|
||||
|
||||
|
||||
uid, gid = extract_uid_gid_monitoring(daemon_type)
|
||||
c = get_container(args.fsid, daemon_type, daemon_id)
|
||||
deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
|
||||
@ -2753,17 +2750,17 @@ def command_deploy():
|
||||
elif daemon_type == NFSGanesha.daemon_type:
|
||||
if not args.reconfig and not redeploy:
|
||||
NFSGanesha.port_in_use()
|
||||
(config, keyring) = get_config_and_keyring()
|
||||
config, keyring = get_config_and_keyring()
|
||||
# TODO: extract ganesha uid/gid (997, 994) ?
|
||||
(uid, gid) = extract_uid_gid()
|
||||
uid, gid = extract_uid_gid()
|
||||
c = get_container(args.fsid, daemon_type, daemon_id)
|
||||
deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
|
||||
config=config, keyring=keyring,
|
||||
reconfig=args.reconfig)
|
||||
|
||||
elif daemon_type == CephIscsi.daemon_type:
|
||||
(config, keyring) = get_config_and_keyring()
|
||||
(uid, gid) = extract_uid_gid()
|
||||
config, keyring = get_config_and_keyring()
|
||||
uid, gid = extract_uid_gid()
|
||||
c = get_container(args.fsid, daemon_type, daemon_id)
|
||||
deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
|
||||
config=config, keyring=keyring,
|
||||
|
@ -1,13 +1,12 @@
|
||||
import json
|
||||
import errno
|
||||
import logging
|
||||
import time
|
||||
from threading import Event
|
||||
from functools import wraps
|
||||
|
||||
import string
|
||||
from typing import List, Dict, Optional, Callable, Tuple, TypeVar, Type, \
|
||||
Any, NamedTuple, Iterator, Set, Sequence, TYPE_CHECKING, cast, Union
|
||||
from typing import List, Dict, Optional, Callable, Tuple, TypeVar, \
|
||||
Any, Set, TYPE_CHECKING, cast
|
||||
|
||||
import datetime
|
||||
import six
|
||||
@ -186,6 +185,26 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
|
||||
'desc': 'Container image name, without the tag',
|
||||
'runtime': True,
|
||||
},
|
||||
{
|
||||
'name': 'container_image_prometheus',
|
||||
'default': 'prom/prometheus:v2.18.1',
|
||||
'desc': 'Prometheus container image',
|
||||
},
|
||||
{
|
||||
'name': 'container_image_grafana',
|
||||
'default': 'ceph/ceph-grafana:latest',
|
||||
'desc': 'Prometheus container image',
|
||||
},
|
||||
{
|
||||
'name': 'container_image_alertmanager',
|
||||
'default': 'prom/alertmanager:v0.20.0',
|
||||
'desc': 'Prometheus container image',
|
||||
},
|
||||
{
|
||||
'name': 'container_image_node_exporter',
|
||||
'default': 'prom/node-exporter:v0.18.1',
|
||||
'desc': 'Prometheus container image',
|
||||
},
|
||||
{
|
||||
'name': 'warn_on_stray_hosts',
|
||||
'type': 'bool',
|
||||
@ -251,6 +270,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
|
||||
self.host_check_interval = 0
|
||||
self.mode = ''
|
||||
self.container_image_base = ''
|
||||
self.container_image_prometheus = ''
|
||||
self.container_image_grafana = ''
|
||||
self.container_image_alertmanager = ''
|
||||
self.container_image_node_exporter = ''
|
||||
self.warn_on_stray_hosts = True
|
||||
self.warn_on_stray_daemons = True
|
||||
self.warn_on_failed_host_check = True
|
||||
@ -873,14 +896,18 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
|
||||
executable_path))
|
||||
return executable_path
|
||||
|
||||
def _run_cephadm(self, host, entity, command, args,
|
||||
addr=None,
|
||||
stdin=None,
|
||||
def _run_cephadm(self,
|
||||
host: str,
|
||||
entity: Optional[str],
|
||||
command: str,
|
||||
args: List[str],
|
||||
addr: Optional[str] = None,
|
||||
stdin: Optional[str] = None,
|
||||
no_fsid=False,
|
||||
error_ok=False,
|
||||
image=None,
|
||||
env_vars=None):
|
||||
# type: (str, Optional[str], str, List[str], Optional[str], Optional[str], bool, bool, Optional[str], Optional[List[str]]) -> Tuple[List[str], List[str], int]
|
||||
image: Optional[str] = None,
|
||||
env_vars: Optional[List[str]] = None,
|
||||
) -> Tuple[List[str], List[str], int]:
|
||||
"""
|
||||
Run cephadm on the remote host with the given command + args
|
||||
|
||||
@ -902,7 +929,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
|
||||
|
||||
assert image or entity
|
||||
if not image:
|
||||
daemon_type = entity.split('.', 1)[0] # type: ignore
|
||||
daemon_type = entity.split('.', 1)[0] # type: ignore
|
||||
if daemon_type in CEPH_TYPES or \
|
||||
daemon_type == 'nfs' or \
|
||||
daemon_type == 'iscsi':
|
||||
@ -912,7 +939,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
|
||||
'who': utils.name_to_config_section(entity),
|
||||
'key': 'container_image',
|
||||
})
|
||||
image = image.strip() # type: ignore
|
||||
image = image.strip() # type: ignore
|
||||
elif daemon_type == 'prometheus':
|
||||
image = self.container_image_prometheus
|
||||
elif daemon_type == 'grafana':
|
||||
image = self.container_image_grafana
|
||||
elif daemon_type == 'alertmanager':
|
||||
image = self.container_image_alertmanager
|
||||
elif daemon_type == 'node-exporter':
|
||||
image = self.container_image_node_exporter
|
||||
|
||||
self.log.debug('%s container image %s' % (entity, image))
|
||||
|
||||
final_args = []
|
||||
@ -929,8 +965,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
|
||||
final_args += ['--fsid', self._cluster_fsid]
|
||||
final_args += args
|
||||
|
||||
self.log.debug('args: %s' % (' '.join(final_args)))
|
||||
if self.mode == 'root':
|
||||
self.log.debug('args: %s' % (' '.join(final_args)))
|
||||
if stdin:
|
||||
self.log.debug('stdin: %s' % stdin)
|
||||
script = 'injected_argv = ' + json.dumps(final_args) + '\n'
|
||||
@ -1551,11 +1587,18 @@ you may want to run:
|
||||
'keyring': keyring,
|
||||
}
|
||||
|
||||
def _create_daemon(self, daemon_type, daemon_id, host,
|
||||
keyring=None,
|
||||
extra_args=None, extra_config=None,
|
||||
def _create_daemon(self,
|
||||
daemon_type: str,
|
||||
daemon_id: str,
|
||||
host: str,
|
||||
keyring: Optional[str] = None,
|
||||
extra_args: Optional[List[str]] = None,
|
||||
extra_config: Optional[Dict[str, Any]] = None,
|
||||
reconfig=False,
|
||||
osd_uuid_map=None) -> str:
|
||||
osd_uuid_map: Optional[Dict[str, Any]] = None,
|
||||
redeploy=False,
|
||||
) -> str:
|
||||
|
||||
if not extra_args:
|
||||
extra_args = []
|
||||
if not extra_config:
|
||||
@ -1564,7 +1607,7 @@ you may want to run:
|
||||
|
||||
start_time = datetime.datetime.utcnow()
|
||||
deps = [] # type: List[str]
|
||||
cephadm_config = {} # type: Dict[str, Any]
|
||||
cephadm_config = {} # type: Dict[str, Any]
|
||||
if daemon_type == 'prometheus':
|
||||
cephadm_config, deps = self.prometheus_service.generate_config()
|
||||
extra_args.extend(['--config-json', '-'])
|
||||
@ -1597,7 +1640,7 @@ you may want to run:
|
||||
osd_uuid_map = self.get_osd_uuid_map()
|
||||
osd_uuid = osd_uuid_map.get(daemon_id)
|
||||
if not osd_uuid:
|
||||
raise OrchestratorError('osd.%d not in osdmap' % daemon_id)
|
||||
raise OrchestratorError('osd.%s not in osdmap' % daemon_id)
|
||||
extra_args.extend(['--osd-fsid', osd_uuid])
|
||||
|
||||
if reconfig:
|
||||
|
Loading…
Reference in New Issue
Block a user