From 2c025710400b7d15b2b5dd4d7845423ec0caf823 Mon Sep 17 00:00:00 2001 From: Matt Bostock Date: Tue, 4 Oct 2016 09:38:00 +0100 Subject: [PATCH] Add StorCli text collector example script Collect metrics from the StorCLI utility on the health of MegaRAID hardware RAID controllers and write them to stdout so that they can be used by the textfile collector. We parse the JSON output that StorCLI provides. Script must be run as root or with appropriate capabilities for storcli to access the RAID card. Designed to run under Python 2.7, using the system Python provided with many Linux distributions. The metrics look like this: mbostock@host:~$ sudo ./storcli.py megaraid_status_code 0 megaraid_controllers_count 1 megaraid_emergency_hot_spare{controller="0"} 1 megaraid_scheduled_patrol_read{controller="0"} 1 megaraid_virtual_drives{controller="0"} 1 megaraid_drive_groups{controller="0"} 1 megaraid_virtual_drives_optimal{controller="0"} 1 megaraid_degraded{controller="0"} 0 megaraid_battery_backup_healthy{controller="0"} 1 megaraid_ports{controller="0"} 8 megaraid_failed{controller="0"} 0 megaraid_drive_groups_optimal{controller="0"} 1 megaraid_healthy{controller="0"} 1 megaraid_physical_drives{controller="0"} 24 megaraid_controller_info{controller="0", model="AVAGOMegaRAIDSASPCIExpressROMB"} 1 mbostock@host:~$ --- text_collector_examples/storcli.py | 101 +++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100755 text_collector_examples/storcli.py diff --git a/text_collector_examples/storcli.py b/text_collector_examples/storcli.py new file mode 100755 index 00000000..f1a8a602 --- /dev/null +++ b/text_collector_examples/storcli.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + +# Script to parse StorCLI's JSON output and expose +# MegaRAID health as Prometheus metrics. +# +# Tested against StorCLI 'Ver 1.14.12 Nov 25, 2014'. +# +# StorCLI reference manual: +# http://docs.avagotech.com/docs/12352476 +# +# Advanced Software Options (ASO) not exposed as metrics currently. +# +# JSON key abbreviations used by StorCLI are documented in the standard command +# output, i.e. when you omit the trailing 'J' from the command. + +import argparse +import json +import subprocess + +DESCRIPTION = """Parses StorCLI's JSON output and exposes MegaRAID health as + Prometheus metrics.""" +VERSION = '0.0.1' + +METRIC_PREFIX = 'megaraid_' +METRIC_CONTROLLER_LABELS = '{{controller="{}", model="{}"}}' + + +def main(args): + data = json.loads(get_storcli_json(args.storcli_path)) + + # It appears that the data we need will always be present in the first + # item in the Controllers array + status = data['Controllers'][0] + + metrics = { + 'status_code': status['Command Status']['Status Code'], + 'controllers': status['Response Data']['Number of Controllers'], + } + + for name, value in metrics.iteritems(): + print("{}{} {}".format(METRIC_PREFIX, name, value)) + + controller_info = [] + controller_metrics = {} + overview = [] + + try: + overview = status['Response Data']['System Overview'] + except KeyError: + pass + + for controller in overview: + controller_index = controller['Ctl'] + model = controller['Model'] + controller_info.append(METRIC_CONTROLLER_LABELS.format(controller_index, model)) + + controller_metrics = { + # FIXME: Parse dimmer switch options + # 'dimmer_switch': controller['DS'], + + 'battery_backup_healthy': int(controller['BBU'] == 'Opt'), + 'degraded': int(controller['Hlth'] == 'Dgd'), + 'drive_groups': controller['DGs'], + 'emergency_hot_spare': int(controller['EHS'] == 'Y'), + 'failed': int(controller['Hlth'] == 'Fld'), + 'healthy': int(controller['Hlth'] == 'Opt'), + 'physical_drives': controller['PDs'], + 'ports': controller['Ports'], + 'scheduled_patrol_read': int(controller['sPR'] == 'On'), + 'virtual_drives': controller['VDs'], + + # Reverse StorCLI's logic to make metrics consistent + 'drive_groups_optimal': int(controller['DNOpt'] == 0), + 'virtual_drives_optimal': int(controller['VNOpt'] == 0), + } + + for name, value in controller_metrics.iteritems(): + print('{}{}{{controller="{}"}} {}'.format(METRIC_PREFIX, name, controller_index, value)) + + for labels in controller_info: + print('{}{}{} {}'.format(METRIC_PREFIX, 'controller_info', labels, 1)) + + +def get_storcli_json(storcli_path): + storcli_cmd = [storcli_path, 'show', 'all', 'J'] + proc = subprocess.Popen(storcli_cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + return proc.communicate()[0] + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=DESCRIPTION, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--storcli_path', + default='/opt/MegaRAID/storcli/storcli64', + help='path to StorCLi binary') + parser.add_argument('--version', + action='version', + version='%(prog)s {}'.format(VERSION)) + args = parser.parse_args() + + main(args)