mirror of
https://github.com/ceph/ceph
synced 2024-12-22 03:22:00 +00:00
98236e3a1d
Mixin is a way to bundle dashboards, prometheus rules and alerts into jsonnet package. Shifting to mixin will allow easier integration with monitoring automation that some users may use. This commit moves `/monitoring/grafana/dashboards` and `/monitoring/prometheus` to `/monitoring/ceph-mixin`. Prometheus alerts was also converted to Jsonnet using an automated way (from yaml to json to jsonnet). This commit minimises any change made to the generated files and should not change neithers the dashboards nor the Prometheus alerts. In the future some configuration will also be added to jsonnet to add more functionalities to the dashboards or alerts (i.e.: multi cluster). Fixes: https://tracker.ceph.com/issues/53374 Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@cern.ch>
1203 lines
36 KiB
JSON
1203 lines
36 KiB
JSON
{
|
|
"__inputs": [ ],
|
|
"__requires": [
|
|
{
|
|
"id": "grafana",
|
|
"name": "Grafana",
|
|
"type": "grafana",
|
|
"version": "5.3.2"
|
|
},
|
|
{
|
|
"id": "graph",
|
|
"name": "Graph",
|
|
"type": "panel",
|
|
"version": "5.0.0"
|
|
},
|
|
{
|
|
"id": "singlestat",
|
|
"name": "Singlestat",
|
|
"type": "panel",
|
|
"version": "5.0.0"
|
|
}
|
|
],
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": "-- Grafana --",
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"showIn": 0,
|
|
"tags": [ ],
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"description": "",
|
|
"editable": false,
|
|
"gnetId": null,
|
|
"graphTooltip": 0,
|
|
"hideControls": false,
|
|
"id": null,
|
|
"links": [ ],
|
|
"panels": [
|
|
{
|
|
"collapse": false,
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 2,
|
|
"panels": [ ],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": true,
|
|
"title": "$ceph_hosts System Overview",
|
|
"titleSize": "h6",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"#299c46",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"#d44a3a"
|
|
],
|
|
"datasource": "$datasource",
|
|
"format": "none",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": false,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 1
|
|
},
|
|
"id": 3,
|
|
"interval": null,
|
|
"links": [ ],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": "",
|
|
"title": "OSDs",
|
|
"type": "singlestat",
|
|
"valueFontSize": "80%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"aliasColors": {
|
|
"interrupt": "#447EBC",
|
|
"steal": "#6D1F62",
|
|
"system": "#890F02",
|
|
"user": "#3F6833",
|
|
"wait": "#C15C17"
|
|
},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "$datasource",
|
|
"description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 6,
|
|
"x": 3,
|
|
"y": 1
|
|
},
|
|
"id": 4,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [ ],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"repeat": null,
|
|
"seriesOverrides": [ ],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[1m]))\n) * 100",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{mode}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [ ],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "CPU Utilization",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": [ ]
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "percent",
|
|
"label": "% Utilization",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"aliasColors": {
|
|
"Available": "#508642",
|
|
"Free": "#508642",
|
|
"Total": "#bf1b00",
|
|
"Used": "#bf1b00",
|
|
"total": "#bf1b00",
|
|
"used": "#0a50a1"
|
|
},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "$datasource",
|
|
"description": "",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 6,
|
|
"x": 9,
|
|
"y": 1
|
|
},
|
|
"id": 5,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [ ],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"repeat": null,
|
|
"seriesOverrides": [
|
|
{
|
|
"alias": "total",
|
|
"color": "#bf1b00",
|
|
"fill": 0,
|
|
"linewidth": 2,
|
|
"stack": false
|
|
}
|
|
],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "Free",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"} ",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "total",
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"expr": "(node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n(node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n(node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) \n",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "buffers/cache",
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"expr": "(node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})- (\n (node_memory_MemFree{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Cached{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) + \n (node_memory_Buffers{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"}) +\n (node_memory_Slab{instance=~\"$ceph_hosts([\\\\.:].*)?\"} or node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\.:].*)?\"})\n )\n \n",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "used",
|
|
"refId": "D"
|
|
}
|
|
],
|
|
"thresholds": [ ],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "RAM Usage",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": [ ]
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"label": "RAM used",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"aliasColors": { },
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "$datasource",
|
|
"description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 6,
|
|
"x": 15,
|
|
"y": 1
|
|
},
|
|
"id": 6,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [ ],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"repeat": null,
|
|
"seriesOverrides": [
|
|
{
|
|
"alias": "/.*tx/",
|
|
"transform": "negative-Y"
|
|
}
|
|
],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "sum by (device) (\n irate(node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or \n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}.rx",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\",device!=\"lo\"}[1m])\n)",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}.tx",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"thresholds": [ ],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Network Load",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": [ ]
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "decbytes",
|
|
"label": "Send (-) / Receive (+)",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"aliasColors": { },
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "$datasource",
|
|
"description": "",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 21,
|
|
"y": 1
|
|
},
|
|
"id": 7,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [ ],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"repeat": null,
|
|
"seriesOverrides": [
|
|
{
|
|
"alias": "/.*tx/",
|
|
"transform": "negative-Y"
|
|
}
|
|
],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "irate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}.rx",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "irate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}.tx",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"thresholds": [ ],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Network drop rate",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": [ ]
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "pps",
|
|
"label": "Send (-) / Receive (+)",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"#299c46",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"#d44a3a"
|
|
],
|
|
"datasource": "$datasource",
|
|
"description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
|
|
"format": "bytes",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": false,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 6
|
|
},
|
|
"id": 8,
|
|
"interval": null,
|
|
"links": [ ],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\.:].*)?\"})",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": "",
|
|
"title": "Raw Capacity",
|
|
"type": "singlestat",
|
|
"valueFontSize": "80%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"aliasColors": { },
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "$datasource",
|
|
"description": "",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 21,
|
|
"y": 6
|
|
},
|
|
"id": 9,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [ ],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"repeat": null,
|
|
"seriesOverrides": [
|
|
{
|
|
"alias": "/.*tx/",
|
|
"transform": "negative-Y"
|
|
}
|
|
],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "irate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}.rx",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "irate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\.:].*)?\"}[1m])",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}.tx",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"thresholds": [ ],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "Network error rate",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": [ ]
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "pps",
|
|
"label": "Send (-) / Receive (+)",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"collapse": false,
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 11
|
|
},
|
|
"id": 10,
|
|
"panels": [ ],
|
|
"repeat": null,
|
|
"repeatIteration": null,
|
|
"repeatRowId": null,
|
|
"showTitle": true,
|
|
"title": "OSD Disk Performance Statistics",
|
|
"titleSize": "h6",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"aliasColors": { },
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "$datasource",
|
|
"description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 11,
|
|
"x": 0,
|
|
"y": 12
|
|
},
|
|
"id": 11,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [ ],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"repeat": null,
|
|
"seriesOverrides": [
|
|
{
|
|
"alias": "/.*reads/",
|
|
"transform": "negative-Y"
|
|
}
|
|
],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}({{ceph_daemon}}) writes",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "label_replace(\n (irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ),\n \"instance\",\n \"$1\",\n \"instance\",\n \"([^:.]*).*\"\n )",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}({{ceph_daemon}}) reads",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"thresholds": [ ],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "$ceph_hosts Disk IOPS",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": [ ]
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "ops",
|
|
"label": "Read (-) / Write (+)",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"aliasColors": { },
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "$datasource",
|
|
"description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 11,
|
|
"x": 12,
|
|
"y": 12
|
|
},
|
|
"id": 12,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [ ],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"repeat": null,
|
|
"seriesOverrides": [
|
|
{
|
|
"alias": "/.*read/",
|
|
"transform": "negative-Y"
|
|
}
|
|
],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace((irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}({{ceph_daemon}}) write",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "label_replace((irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m])), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}({{ceph_daemon}}) read",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"thresholds": [ ],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "$ceph_hosts Throughput by Disk",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": [ ]
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "Bps",
|
|
"label": "Read (-) / Write (+)",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"aliasColors": { },
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "$datasource",
|
|
"description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 11,
|
|
"x": 0,
|
|
"y": 21
|
|
},
|
|
"id": 13,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [ ],
|
|
"nullPointMode": "null as zero",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"repeat": null,
|
|
"seriesOverrides": [ ],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "max by(instance,device) (label_replace((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]), 0.001), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}({{ceph_daemon}})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [ ],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "$ceph_hosts Disk Latency",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": [ ]
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "s",
|
|
"label": "",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"aliasColors": { },
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "$datasource",
|
|
"description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.",
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 11,
|
|
"x": 12,
|
|
"y": 21
|
|
},
|
|
"id": 14,
|
|
"legend": {
|
|
"alignAsTable": false,
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": null,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [ ],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"repeat": null,
|
|
"seriesOverrides": [ ],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\.:].*)?\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\")",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{device}}({{ceph_daemon}})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [ ],
|
|
"timeFrom": null,
|
|
"timeShift": null,
|
|
"title": "$ceph_hosts Disk utilization",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": [ ]
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "percent",
|
|
"label": "%Util",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"refresh": "10s",
|
|
"rows": [ ],
|
|
"schemaVersion": 16,
|
|
"style": "dark",
|
|
"tags": [
|
|
"overview"
|
|
],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {
|
|
"text": "default",
|
|
"value": "default"
|
|
},
|
|
"hide": 0,
|
|
"label": "Data Source",
|
|
"name": "datasource",
|
|
"options": [ ],
|
|
"query": "prometheus",
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"type": "datasource"
|
|
},
|
|
{
|
|
"allValue": null,
|
|
"current": { },
|
|
"datasource": "$datasource",
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"label": "Hostname",
|
|
"multi": false,
|
|
"name": "ceph_hosts",
|
|
"options": [ ],
|
|
"query": "label_values(node_scrape_collector_success, instance) ",
|
|
"refresh": 1,
|
|
"regex": "([^.:]*).*",
|
|
"sort": 3,
|
|
"tagValuesQuery": "",
|
|
"tags": [ ],
|
|
"tagsQuery": "",
|
|
"type": "query",
|
|
"useTags": false
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {
|
|
"refresh_intervals": [
|
|
"5s",
|
|
"10s",
|
|
"30s",
|
|
"1m",
|
|
"5m",
|
|
"15m",
|
|
"30m",
|
|
"1h",
|
|
"2h",
|
|
"1d"
|
|
],
|
|
"time_options": [
|
|
"5m",
|
|
"15m",
|
|
"1h",
|
|
"6h",
|
|
"12h",
|
|
"24h",
|
|
"2d",
|
|
"7d",
|
|
"30d"
|
|
]
|
|
},
|
|
"timezone": "",
|
|
"title": "Host Details",
|
|
"uid": "rtOg0AiWz",
|
|
"version": 0
|
|
}
|