ceph/monitoring/ceph-mixin/dashboards_out/osds-overview.json
Arthur Outhenin-Chalandre 98236e3a1d
mgr/dashboard: monitoring: refactor into ceph-mixin
Mixin is a way to bundle dashboards, prometheus rules and alerts into
jsonnet package. Shifting to mixin will allow easier integration with
monitoring automation that some users may use.

This commit moves `/monitoring/grafana/dashboards` and
`/monitoring/prometheus` to `/monitoring/ceph-mixin`. Prometheus alerts
was also converted to Jsonnet using an automated way (from yaml to json
to jsonnet). This commit minimises any change made to the generated files
and should not change neithers the dashboards nor the Prometheus alerts.

In the future some configuration will also be added to jsonnet to add
more functionalities to the dashboards or alerts (i.e.: multi cluster).

Fixes: https://tracker.ceph.com/issues/53374
Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@cern.ch>
2022-02-03 13:08:20 +01:00

918 lines
25 KiB
JSON

{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.0.0"
},
{
"id": "grafana-piechart-panel",
"name": "Pie Chart",
"type": "panel",
"version": "1.3.3"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
},
{
"id": "table",
"name": "Table",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"aliasColors": {
"@95%ile": "#e0752d"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 0
},
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "avg (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "AVG read",
"refId": "A"
},
{
"expr": "max (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "MAX read",
"refId": "B"
},
{
"expr": "quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "@95%ile",
"refId": "C"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "OSD Read Latencies",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
]
},
{
"columns": [ ],
"datasource": "$datasource",
"description": "This table shows the osd's that are delivering the 10 highest read latencies within the cluster",
"gridPos": {
"h": 8,
"w": 4,
"x": 8,
"y": 0
},
"id": 3,
"links": [ ],
"sort": {
"col": 2,
"desc": true
},
"styles": [
{
"alias": "OSD ID",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "ceph_daemon",
"thresholds": [ ],
"type": "string",
"unit": "short",
"valueMaps": [ ]
},
{
"alias": "Latency (ms)",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Value",
"thresholds": [ ],
"type": "number",
"unit": "none",
"valueMaps": [ ]
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "/.*/",
"thresholds": [ ],
"type": "hidden",
"unit": "short",
"valueMaps": [ ]
}
],
"targets": [
{
"expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n",
"format": "table",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Highest READ Latencies",
"transform": "table",
"type": "table"
},
{
"aliasColors": {
"@95%ile write": "#e0752d"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 8,
"x": 12,
"y": 0
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "avg (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "AVG write",
"refId": "A"
},
{
"expr": "max (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "MAX write",
"refId": "B"
},
{
"expr": "quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "@95%ile write",
"refId": "C"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "OSD Write Latencies",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
]
},
{
"columns": [ ],
"datasource": "$datasource",
"description": "This table shows the osd's that are delivering the 10 highest write latencies within the cluster",
"gridPos": {
"h": 8,
"w": 4,
"x": 20,
"y": 0
},
"id": 5,
"links": [ ],
"sort": {
"col": 2,
"desc": true
},
"styles": [
{
"alias": "OSD ID",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "ceph_daemon",
"thresholds": [ ],
"type": "string",
"unit": "short",
"valueMaps": [ ]
},
{
"alias": "Latency (ms)",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Value",
"thresholds": [ ],
"type": "number",
"unit": "none",
"valueMaps": [ ]
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "/.*/",
"thresholds": [ ],
"type": "hidden",
"unit": "short",
"valueMaps": [ ]
}
],
"targets": [
{
"expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n ))\n)\n\n",
"format": "table",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Highest WRITE Latencies",
"transform": "table",
"type": "table"
},
{
"aliasColors": { },
"datasource": "$datasource",
"description": "",
"gridPos": {
"h": 8,
"w": 4,
"x": 0,
"y": 8
},
"id": 6,
"legend": {
"percentage": true,
"show": true,
"values": true
},
"legendType": "Under graph",
"pieType": "pie",
"targets": [
{
"expr": "count by (device_class) (ceph_osd_metadata)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device_class}}",
"refId": "A"
}
],
"title": "OSD Types Summary",
"type": "grafana-piechart-panel",
"valueName": "current"
},
{
"aliasColors": {
"Non-Encrypted": "#E5AC0E"
},
"datasource": "$datasource",
"description": "",
"gridPos": {
"h": 8,
"w": 4,
"x": 4,
"y": 8
},
"id": 7,
"legend": {
"percentage": true,
"show": true,
"values": true
},
"legendType": "Under graph",
"pieType": "pie",
"targets": [
{
"expr": "count(ceph_bluefs_wal_total_bytes)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "bluestore",
"refId": "A"
},
{
"expr": "absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "filestore",
"refId": "B"
}
],
"title": "OSD Objectstore Types",
"type": "grafana-piechart-panel",
"valueName": "current"
},
{
"aliasColors": { },
"datasource": "$datasource",
"description": "The pie chart shows the various OSD sizes used within the cluster",
"gridPos": {
"h": 8,
"w": 4,
"x": 8,
"y": 8
},
"id": 8,
"legend": {
"percentage": true,
"show": true,
"values": true
},
"legendType": "Under graph",
"pieType": "pie",
"targets": [
{
"expr": "count(ceph_osd_stat_bytes < 1099511627776)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "<1TB",
"refId": "A"
},
{
"expr": "count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "<2TB",
"refId": "B"
},
{
"expr": "count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "<3TB",
"refId": "C"
},
{
"expr": "count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "<4TB",
"refId": "D"
},
{
"expr": "count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "<6TB",
"refId": "E"
},
{
"expr": "count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "<8TB",
"refId": "F"
},
{
"expr": "count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "<10TB",
"refId": "G"
},
{
"expr": "count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "<12TB",
"refId": "H"
},
{
"expr": "count(ceph_osd_stat_bytes >= 13194139533312)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "<12TB+",
"refId": "I"
}
],
"title": "OSD Size Summary",
"type": "grafana-piechart-panel",
"valueName": "current"
},
{
"aliasColors": { },
"bars": true,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 8,
"x": 12,
"y": 8
},
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "ceph_osd_numpg\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PGs per OSD",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Distribution of PGs per OSD",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": 20,
"mode": "histogram",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": "# of OSDs",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
]
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": true,
"colors": [
"#d44a3a",
"#299c46"
],
"datasource": "$datasource",
"description": "This gauge panel shows onode Hits ratio to help determine if increasing RAM per OSD could help improve the performance of the cluster",
"format": "percentunit",
"gauge": {
"maxValue": 1,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 8,
"w": 4,
"x": 20,
"y": 8
},
"id": 10,
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "sum(ceph_bluestore_onode_hits)/(sum(ceph_bluestore_onode_hits) + sum(ceph_bluestore_onode_misses))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": ".75",
"title": "OSD onode Hits Ratio",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"collapse": false,
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
},
"id": 11,
"panels": [ ],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "R/W Profile",
"titleSize": "h6",
"type": "row"
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"description": "Show the read/write workload profile overtime",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 17
},
"id": 12,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "round(sum(irate(ceph_pool_rd[30s])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Reads",
"refId": "A"
},
{
"expr": "round(sum(irate(ceph_pool_wr[30s])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Writes",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Read/Write Profile",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"refresh": "10s",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
"tags": [ ],
"templating": {
"list": [
{
"current": {
"text": "default",
"value": "default"
},
"hide": 0,
"label": "Data Source",
"name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "OSD Overview",
"uid": "lo02I1Aiz",
"version": 0
}