From 43380f8d98168bb3b2770275fb47084606e3be3d Mon Sep 17 00:00:00 2001 From: Wido den Hollander Date: Fri, 30 Jun 2017 16:49:46 +0200 Subject: [PATCH 1/3] mgr: set/get_localized_config in MgrModule get_localized_config was getting redundant as it was copied to various modules. This commit also introduces set_localized_config() which set a localized configuration option for a module. Signed-off-by: Wido den Hollander --- src/pybind/mgr/dashboard/module.py | 6 ------ src/pybind/mgr/mgr_module.py | 25 +++++++++++++++++++++++++ src/pybind/mgr/restful/module.py | 6 ------ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/pybind/mgr/dashboard/module.py b/src/pybind/mgr/dashboard/module.py index 362faddad11..02a1a75bd2a 100644 --- a/src/pybind/mgr/dashboard/module.py +++ b/src/pybind/mgr/dashboard/module.py @@ -103,12 +103,6 @@ class Module(MgrModule): return self._rados - def get_localized_config(self, key): - r = self.get_config(self.get_mgr_id() + '/' + key) - if r is None: - r = self.get_config(key) - return r - def update_pool_stats(self): df = global_instance().get("df") pool_stats = dict([(p['id'], p['stats']) for p in df['pools']]) diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index e76aa04be8a..cf7fff949d7 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -191,6 +191,21 @@ class MgrModule(object): """ return ceph_state.get_config_prefix(self._handle, key_prefix) + def get_localized_config(self, key, default=None): + """ + Retrieve localized configuration for this ceph-mgr instance + :param key: str + :param default: str + :return: str + """ + r = self.get_config(self.get_mgr_id() + '/' + key) + if r is None: + r = self.get_config(key) + + if r is None: + r = default + return r + def set_config(self, key, val): """ Set the value of a persistent configuration setting @@ -200,6 +215,15 @@ class MgrModule(object): """ ceph_state.set_config(self._handle, key, val) + def set_localized_config(self, key, val): + """ + Set localized configuration for this ceph-mgr instance + :param key: str + :param default: str + :return: str + """ + return self.set_config(self.get_mgr_id() + '/' + key, val) + def set_config_json(self, key, val): """ Helper for setting json-serialized-config @@ -221,3 +245,4 @@ class MgrModule(object): return None else: return json.loads(raw) + diff --git a/src/pybind/mgr/restful/module.py b/src/pybind/mgr/restful/module.py index 508bb7f5274..a2fef7a6898 100644 --- a/src/pybind/mgr/restful/module.py +++ b/src/pybind/mgr/restful/module.py @@ -254,12 +254,6 @@ class Module(MgrModule): self.serve_event.wait() self.serve_event.clear() - def get_localized_config(self, key): - r = self.get_config(self.get_mgr_id() + '/' + key) - if r is None: - r = self.get_config(key) - return r - def refresh_keys(self): self.keys = {} rawkeys = self.get_config_prefix('keys/') or {} From 270a3e0f210f9bb84ed9c7ec628bf8362d6f2e88 Mon Sep 17 00:00:00 2001 From: Wido den Hollander Date: Mon, 3 Jul 2017 14:48:06 +0200 Subject: [PATCH 2/3] mgr: Implement self_test() method in MgrModule Other modules are encouraged to override this method and implement a as good as possible self-test which returns True or False It should be a simple way of running (automated) testing on a ceph-mgr module Signed-off-by: Wido den Hollander --- src/pybind/mgr/mgr_module.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index cf7fff949d7..2e4ba6b142c 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -246,3 +246,10 @@ class MgrModule(object): else: return json.loads(raw) + def self_test(self): + """ + Run a self-test on the module. Override this function and implement + a best as possible self-test for (automated) testing of the module + :return: bool + """ + pass From bcf3a7edcbeb82e4af2c31cffe542c64aa0a91ec Mon Sep 17 00:00:00 2001 From: Wido den Hollander Date: Tue, 27 Jun 2017 15:49:03 +0200 Subject: [PATCH 3/3] mgr: Zabbix monitoring module This ceph-mgr module will pull various values from the Ceph cluster and send them to a Zabbix Server using zabbix_sender. This requires the zabbix_sender executable to be present on the system running ceph-mgr as it will be invoked to send data to Zabbix. A Zabbix template can be found in this directory which can be used to easily get data from your Ceph cluster into Zabbix. More information is available in the README file found in the module's directory. Signed-off-by: Wido den Hollander --- doc/mgr/index.rst | 1 + doc/mgr/zabbix.rst | 104 ++ src/pybind/mgr/zabbix/__init__.py | 1 + src/pybind/mgr/zabbix/module.py | 277 ++++ src/pybind/mgr/zabbix/zabbix_template.xml | 1707 +++++++++++++++++++++ 5 files changed, 2090 insertions(+) create mode 100644 doc/mgr/zabbix.rst create mode 100644 src/pybind/mgr/zabbix/__init__.py create mode 100644 src/pybind/mgr/zabbix/module.py create mode 100644 src/pybind/mgr/zabbix/zabbix_template.xml diff --git a/doc/mgr/index.rst b/doc/mgr/index.rst index 38bdec15396..28280d0625c 100644 --- a/doc/mgr/index.rst +++ b/doc/mgr/index.rst @@ -28,5 +28,6 @@ sensible. Installation and Configuration Dashboard RESTful + Zabbix Writing plugins diff --git a/doc/mgr/zabbix.rst b/doc/mgr/zabbix.rst new file mode 100644 index 00000000000..03abdd6ef98 --- /dev/null +++ b/doc/mgr/zabbix.rst @@ -0,0 +1,104 @@ +Zabbix plugin +============= + +The Zabbix plugin actively sends information to a Zabbix server like: + +- Ceph status +- I/O operations +- I/O bandwidth +- OSD status +- Storage utilization + +Requirements +============ + +The plugin requires that the *zabbix_sender* executable is present on *all* +machines running ceph-mgr. It can be installed on most distributions using +the package manager. + +Dependencies +------------ +Installing zabbix_sender can be done under Ubuntu or CentOS using either apt +or dnf. + +On Ubuntu Xenial: + +:: + + apt install zabbix-agent + +On Fedora: + +:: + + dnf install zabbix-sender + + +Enabling +======== + +Add this to your ceph.conf on nodes where you run ceph-mgr: + +:: + + [mgr] + mgr modules = zabbix + +If you use any other ceph-mgr modules, make sure they're in the list too. + +Restart the ceph-mgr daemon after modifying the setting to load the module. + + +Configuration +============= + +Two configuration keys are mandatory for the module to work: + +- mgr/zabbix/zabbix_host +- mgr/zabbix/identifier + +The parameter *zabbix_host* controls the hostname of the Zabbix server to which +*zabbix_sender* will send the items. This can be a IP-Address if required by +your installation. + +The *identifier* parameter controls the identifier/hostname to use as source +when sending items to Zabbix. This should match the name of the *Host* in +your Zabbix server. + +Additional configuration keys which can be configured and their default values: + +- mgr/zabbix/zabbix_port: 10051 +- mgr/zabbix/zabbix_sender: /usr/bin/zabbix_sender +- mgr/zabbix/interval: 60 + +Configurations keys +------------------- + +Configuration keys can be set on any machine with the proper cephx credentials, +these are usually Monitors where the *client.admin* key is present. + +:: + + ceph config-key put + +For example: + +:: + + ceph config-key put mgr/zabbix/zabbix_host zabbix.localdomain + ceph config-key put mgr/zabbix/identifier ceph.eu-ams02.local + +Debugging +========= + +Should you want to debug the Zabbix module increase the logging level for +ceph-mgr and check the logs. + +:: + + [mgr] + debug mgr = 20 + +With logging set to debug for the manager the plugin will print various logging +lines prefixed with *mgr[zabbix]* for easy filtering. + diff --git a/src/pybind/mgr/zabbix/__init__.py b/src/pybind/mgr/zabbix/__init__.py new file mode 100644 index 00000000000..0440e0705fb --- /dev/null +++ b/src/pybind/mgr/zabbix/__init__.py @@ -0,0 +1 @@ +from module import * # NOQA diff --git a/src/pybind/mgr/zabbix/module.py b/src/pybind/mgr/zabbix/module.py new file mode 100644 index 00000000000..9efb74c2ee0 --- /dev/null +++ b/src/pybind/mgr/zabbix/module.py @@ -0,0 +1,277 @@ +""" +Zabbix module for ceph-mgr + +Collect statistics from Ceph cluster and every X seconds send data to a Zabbix +server using the zabbix_sender executable. +""" +import json +import errno +from subprocess import Popen, PIPE +from threading import Event +from mgr_module import MgrModule + + +def avg(data): + return sum(data) / float(len(data)) + + +class ZabbixSender(object): + def __init__(self, sender, host, port, log): + self.sender = sender + self.host = host + self.port = port + self.log = log + + def send(self, hostname, data): + if len(data) == 0: + return + + cmd = [self.sender, '-z', self.host, '-p', str(self.port), '-s', + hostname, '-vv', '-i', '-'] + + proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) + + for key, value in data.items(): + proc.stdin.write('{0} ceph.{1} {2}\n'.format(hostname, key, value)) + + stdout, stderr = proc.communicate() + if proc.returncode != 0: + raise RuntimeError('%s exited non-zero: %s' % (self.sender, + stderr)) + + self.log.debug('Zabbix Sender: %s', stdout.rstrip()) + + +class Module(MgrModule): + run = False + config = dict() + ceph_health_mapping = {'HEALTH_OK': 0, 'HEALTH_WARN': 1, 'HEALTH_ERR': 2} + + config_keys = { + 'zabbix_sender': '/usr/bin/zabbix_sender', + 'zabbix_host': None, + 'zabbix_port': 10051, + 'identifier': None, 'interval': 60 + } + + COMMANDS = [ + { + "cmd": "zabbix config-set name=key,type=CephString " + "name=value,type=CephString", + "desc": "Set a configuration value", + "perm": "rw" + }, + { + "cmd": "zabbix config-show", + "desc": "Show current configuration", + "perm": "r" + }, + { + "cmd": "zabbix send", + "desc": "Force sending data to Zabbux", + "perm": "rw" + }, + { + "cmd": "zabbix self-test", + "desc": "Run a self-test on the Zabbix module", + "perm": "r" + } + ] + + def __init__(self, *args, **kwargs): + super(Module, self).__init__(*args, **kwargs) + self.event = Event() + + def init_module_config(self): + for key, default in self.config_keys.items(): + value = self.get_localized_config(key, default) + if value is None: + raise RuntimeError('Configuration key {0} not set; "ceph ' + 'config-key put mgr/zabbix/{0} ' + '"'.format(key)) + + self.set_config_option(key, value) + + def set_config_option(self, option, value): + if option not in self.config_keys.keys(): + raise RuntimeError('{0} is a unknown configuration ' + 'option'.format(option)) + + if option in ['zabbix_port', 'interval']: + try: + value = int(value) + except (ValueError, TypeError): + raise RuntimeError('invalid {0} configured. Please specify ' + 'a valid integer'.format(option)) + + if option == 'interval' and value < 10: + raise RuntimeError('interval should be set to at least 10 seconds') + + self.config[option] = value + + def get_data(self): + data = dict() + + health = json.loads(self.get('health')['json']) + data['overall_status'] = health['overall_status'] + data['overall_status_int'] = \ + self.ceph_health_mapping.get(data['overall_status']) + + mon_status = json.loads(self.get('mon_status')['json']) + data['num_mon'] = len(mon_status['monmap']['mons']) + + df = self.get('df') + data['num_pools'] = len(df['pools']) + data['total_objects'] = df['stats']['total_objects'] + data['total_used_bytes'] = df['stats']['total_used_bytes'] + data['total_bytes'] = df['stats']['total_bytes'] + data['total_avail_bytes'] = df['stats']['total_avail_bytes'] + + wr_ops = 0 + rd_ops = 0 + wr_bytes = 0 + rd_bytes = 0 + + for pool in df['pools']: + wr_ops += pool['stats']['wr'] + rd_ops += pool['stats']['rd'] + wr_bytes += pool['stats']['wr_bytes'] + rd_bytes += pool['stats']['rd_bytes'] + + data['wr_ops'] = wr_ops + data['rd_ops'] = rd_ops + data['wr_bytes'] = wr_bytes + data['rd_bytes'] = rd_bytes + + osd_map = self.get('osd_map') + data['num_osd'] = len(osd_map['osds']) + data['osd_nearfull_ratio'] = osd_map['nearfull_ratio'] + data['osd_full_ratio'] = osd_map['full_ratio'] + data['osd_backfillfull_ratio'] = osd_map['backfillfull_ratio'] + + data['num_pg_temp'] = len(osd_map['pg_temp']) + + num_up = 0 + num_in = 0 + for osd in osd_map['osds']: + if osd['up'] == 1: + num_up += 1 + + if osd['in'] == 1: + num_in += 1 + + data['num_osd_up'] = num_up + data['num_osd_in'] = num_in + + osd_fill = list() + osd_apply_latency = list() + osd_commit_latency = list() + + osd_stats = self.get('osd_stats') + for osd in osd_stats['osd_stats']: + osd_fill.append((float(osd['kb_used']) / float(osd['kb'])) * 100) + osd_apply_latency.append(osd['perf_stat']['apply_latency_ms']) + osd_commit_latency.append(osd['perf_stat']['commit_latency_ms']) + + try: + data['osd_max_fill'] = max(osd_fill) + data['osd_min_fill'] = min(osd_fill) + data['osd_avg_fill'] = avg(osd_fill) + except ValueError: + pass + + try: + data['osd_latency_apply_max'] = max(osd_apply_latency) + data['osd_latency_apply_min'] = min(osd_apply_latency) + data['osd_latency_apply_avg'] = avg(osd_apply_latency) + + data['osd_latency_commit_max'] = max(osd_commit_latency) + data['osd_latency_commit_min'] = min(osd_commit_latency) + data['osd_latency_commit_avg'] = avg(osd_commit_latency) + except ValueError: + pass + + pg_summary = self.get('pg_summary') + num_pg = 0 + for state, num in pg_summary['all'].items(): + num_pg += num + + data['num_pg'] = num_pg + + return data + + def send(self): + data = self.get_data() + + self.log.debug('Sending data to Zabbix server %s', + self.config['zabbix_host']) + self.log.debug(data) + + try: + zabbix = ZabbixSender(self.config['zabbix_sender'], + self.config['zabbix_host'], + self.config['zabbix_port'], self.log) + zabbix.send(self.config['identifier'], data) + except Exception as exc: + self.log.error('Exception when sending: %s', exc) + + def handle_command(self, command): + if command['prefix'] == 'zabbix config-show': + return 0, json.dumps(self.config), '' + elif command['prefix'] == 'zabbix config-set': + key = command['key'] + value = command['value'] + if not value: + return -errno.EINVAL, '', 'Value should not be empty or None' + + self.log.debug('Setting configuration option %s to %s', key, value) + self.set_config_option(key, value) + self.set_localized_config(key, value) + return 0, 'Configuration option {0} updated'.format(key), '' + elif command['prefix'] == 'zabbix send': + self.send() + return 0, 'Sending data to Zabbix', '' + elif command['prefix'] == 'zabbix self-test': + self.self_test() + return 0, 'Self-test succeeded', '' + else: + return (-errno.EINVAL, '', + "Command not found '{0}'".format(command['prefix'])) + + def shutdown(self): + self.log.info('Stopping zabbix') + self.run = False + self.event.set() + + def serve(self): + self.log.debug('Zabbix module starting up') + self.run = True + + self.init_module_config() + + for key, value in self.config.items(): + self.log.debug('%s: %s', key, value) + + while self.run: + self.log.debug('Waking up for new iteration') + + # Sometimes fetching data fails, should be fixed by PR #16020 + try: + self.send() + except Exception as exc: + self.log.error(exc) + + interval = self.config['interval'] + self.log.debug('Sleeping for %d seconds', interval) + self.event.wait(interval) + + def self_test(self): + data = self.get_data() + + if data['overall_status'] not in self.ceph_health_mapping: + raise RuntimeError('No valid overall_status found in data') + + int(data['overall_status_int']) + + if data['num_mon'] < 1: + raise RuntimeError('num_mon is smaller than 1') diff --git a/src/pybind/mgr/zabbix/zabbix_template.xml b/src/pybind/mgr/zabbix/zabbix_template.xml new file mode 100644 index 00000000000..ecd1ef438a4 --- /dev/null +++ b/src/pybind/mgr/zabbix/zabbix_template.xml @@ -0,0 +1,1707 @@ + + + 3.0 + 2017-07-05T09:03:49Z + + + Templates + + + + + + + + {ceph-mgr Zabbix module:ceph.overall_status_int.last()}=2 + Ceph cluster in ERR state + + 0 + 5 + Ceph cluster is in ERR state + 0 + + + + {ceph-mgr Zabbix module:ceph.overall_status_int.avg(1h)}=1 + Ceph cluster in WARN state + + 0 + 4 + Issue a trigger if Ceph cluster is in WARN state for >1h + 0 + + + + {ceph-mgr Zabbix module:ceph.num_osd_in.change()}>0 + Number of IN OSDs decreased + + 0 + 2 + Amount of OSDs in IN state decreased + 0 + + + + {ceph-mgr Zabbix module:ceph.num_osd_up.change()}>0 + Number of UP OSDs decreased + + 0 + 2 + Amount of OSDs in UP state decreased + 0 + + + + + + Ceph bandwidth + 900 + 200 + 0.0000 + 100.0000 + 1 + 1 + 0 + 1 + 0 + 0.0000 + 0.0000 + 0 + 0 + 0 + 0 + + + 0 + 0 + 1A7C11 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.rd_bytes + + + + 1 + 0 + F63100 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.wr_bytes + + + + + + Ceph free space + 900 + 200 + 0.0000 + 100.0000 + 1 + 1 + 0 + 1 + 0 + 0.0000 + 0.0000 + 1 + 2 + 0 + + ceph-mgr Zabbix module + ceph.total_bytes + + + + 0 + 0 + 2774A4 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.total_avail_bytes + + + + + + Ceph health + 900 + 200 + 0.0000 + 2.0000 + 1 + 1 + 0 + 1 + 0 + 0.0000 + 0.0000 + 1 + 1 + 0 + 0 + + + 0 + 0 + 1A7C11 + 0 + 7 + 0 + + ceph-mgr Zabbix module + ceph.overall_status_int + + + + + + Ceph I/O + 900 + 200 + 0.0000 + 100.0000 + 1 + 1 + 0 + 1 + 0 + 0.0000 + 0.0000 + 0 + 0 + 0 + 0 + + + 0 + 0 + 1A7C11 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.rd_ops + + + + 1 + 0 + F63100 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.wr_ops + + + + + + Ceph OSD latency + 900 + 200 + 0.0000 + 100.0000 + 1 + 1 + 0 + 1 + 0 + 0.0000 + 0.0000 + 0 + 0 + 0 + 0 + + + 0 + 0 + 1A7C11 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_latency_apply_avg + + + + 1 + 0 + F63100 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_latency_commit_avg + + + + 2 + 0 + 2774A4 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_latency_apply_max + + + + 3 + 0 + A54F10 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_latency_commit_max + + + + 4 + 0 + FC6EA3 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_latency_apply_min + + + + 5 + 0 + 6C59DC + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_latency_commit_min + + + + + + Ceph OSD utilization + 900 + 200 + 0.0000 + 100.0000 + 1 + 1 + 0 + 1 + 0 + 0.0000 + 0.0000 + 1 + 1 + 0 + 0 + + + 0 + 0 + 0000CC + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_nearfull_ratio + + + + 1 + 0 + F63100 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_full_ratio + + + + 2 + 0 + CC00CC + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_backfillfull_ratio + + + + 3 + 0 + A54F10 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_max_fill + + + + 4 + 0 + FC6EA3 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_avg_fill + + + + 5 + 0 + 6C59DC + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.osd_min_fill + + + + + + Ceph storage overview + 900 + 200 + 0.0000 + 0.0000 + 0 + 0 + 2 + 1 + 0 + 0.0000 + 0.0000 + 0 + 0 + 0 + 0 + + + 0 + 0 + F63100 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.total_used_bytes + + + + 1 + 0 + 00CC00 + 0 + 2 + 0 + + ceph-mgr Zabbix module + ceph.total_avail_bytes + + + + + +