From 9719cc795e1d6a38ab8a7e8f3eeb56c13f11c25d Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Thu, 10 Mar 2022 17:50:43 +0530 Subject: [PATCH] mgr/dashboard: Pool overall performance shows multiple entries of same pool in pool overview This PR intends to fix this issue Fixes:https://tracker.ceph.com/issues/54513 Signed-off-by: Aashish Sharma --- .../ceph-mixin/dashboards/cephfs.libsonnet | 4 +- .../ceph-mixin/dashboards/host.libsonnet | 47 ++++------- .../ceph-mixin/dashboards/osd.libsonnet | 55 ++++++------- .../ceph-mixin/dashboards/pool.libsonnet | 81 ++++++++++--------- .../ceph-mixin/dashboards/rbd.libsonnet | 23 +++--- .../ceph-mixin/dashboards/rgw.libsonnet | 46 ++++------- .../ceph-mixin/dashboards/utils.libsonnet | 7 +- .../dashboards_out/hosts-overview.json | 6 ++ .../dashboards_out/osds-overview.json | 25 +++--- .../dashboards_out/pool-overview.json | 14 ++++ .../dashboards_out/rbd-overview.json | 3 + 11 files changed, 148 insertions(+), 163 deletions(-) diff --git a/monitoring/ceph-mixin/dashboards/cephfs.libsonnet b/monitoring/ceph-mixin/dashboards/cephfs.libsonnet index 3dabc1608ad..3d09a54536d 100644 --- a/monitoring/ceph-mixin/dashboards/cephfs.libsonnet +++ b/monitoring/ceph-mixin/dashboards/cephfs.libsonnet @@ -18,7 +18,7 @@ local u = import 'utils.libsonnet'; 1, '$datasource') .addTargets( - [u.addTargetSchema(expr, 1, 'time_series', legendFormat)] + [u.addTargetSchema(expr, legendFormat)] ) + { gridPos: { x: x, y: y, w: w, h: h } }; u.dashboardSchema( @@ -80,8 +80,6 @@ local u = import 'utils.libsonnet'; ) .addTarget(u.addTargetSchema( 'sum(rate(ceph_objecter_op_w{ceph_daemon=~"($mds_servers).*"}[1m]))', - 1, - 'time_series', 'Write Ops' )) .addSeriesOverride( diff --git a/monitoring/ceph-mixin/dashboards/host.libsonnet b/monitoring/ceph-mixin/dashboards/host.libsonnet index b2ee5c94f00..06cb66d0a02 100644 --- a/monitoring/ceph-mixin/dashboards/host.libsonnet +++ b/monitoring/ceph-mixin/dashboards/host.libsonnet @@ -9,7 +9,7 @@ local u = import 'utils.libsonnet'; description, valueName, expr, - targetFormat, + instant, x, y, w, @@ -26,7 +26,7 @@ local u = import 'utils.libsonnet'; false, '') .addTarget( - u.addTargetSchema(expr, 1, targetFormat, '') + u.addTargetSchema(expr, '', 'time_series', 1, instant) ) + { gridPos: { x: x, y: y, w: w, h: h } }; local HostsOverviewGraphPanel(title, description, formatY1, expr, legendFormat, x, y, w, h) = @@ -35,7 +35,7 @@ local u = import 'utils.libsonnet'; ) .addTargets( [u.addTargetSchema( - expr, 1, 'time_series', legendFormat + expr, legendFormat )] ) + { gridPos: { x: x, y: y, w: w, h: h } }; @@ -126,7 +126,7 @@ local u = import 'utils.libsonnet'; '', 'current', 'count(sum by (hostname) (ceph_osd_metadata))', - 'time_series', + true, 0, 0, 4, @@ -138,7 +138,7 @@ local u = import 'utils.libsonnet'; 'Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster', 'current', 'avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )', - 'time_series', + true, 4, 0, 4, @@ -150,7 +150,7 @@ local u = import 'utils.libsonnet'; 'Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)', 'current', 'avg (((node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_MemTotal_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"})- (\n (node_memory_MemFree{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_MemFree_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) + \n (node_memory_Cached{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_Cached_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) + \n (node_memory_Buffers{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_Buffers_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) +\n (node_memory_Slab{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_Slab_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"})\n )) /\n (node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_MemTotal_bytes{instance=~"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*"} ))', - 'time_series', + true, 8, 0, 4, @@ -162,7 +162,7 @@ local u = import 'utils.libsonnet'; 'IOPS Load at the device as reported by the OS on all OSD hosts', 'current', 'sum ((irate(node_disk_reads_completed{instance=~"($osd_hosts).*"}[5m]) or irate(node_disk_reads_completed_total{instance=~"($osd_hosts).*"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~"($osd_hosts).*"}[5m]) or irate(node_disk_writes_completed_total{instance=~"($osd_hosts).*"}[5m])))', - 'time_series', + true, 12, 0, 4, @@ -174,7 +174,7 @@ local u = import 'utils.libsonnet'; 'Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)', 'current', 'avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), "instance", "$1", "instance", "([^.:]*).*"\n ) *\n on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~"($osd_hosts).*"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^.:]*).*")\n)', - 'time_series', + true, 16, 0, 4, @@ -202,7 +202,7 @@ local u = import 'utils.libsonnet'; ) ||| , - 'time_series', + true, 20, 0, 4, @@ -242,7 +242,6 @@ local u = import 'utils.libsonnet'; description, valueName, expr, - targetFormat, x, y, w, @@ -258,10 +257,7 @@ local u = import 'utils.libsonnet'; false, false, '') - .addTarget(u.addTargetSchema(expr, - 1, - targetFormat, - '')) + { gridPos: { x: x, y: y, w: w, h: h } }; + .addTarget(u.addTargetSchema(expr)) + { gridPos: { x: x, y: y, w: w, h: h } }; local HostDetailsGraphPanel(alias, title, @@ -288,10 +284,7 @@ local u = import 'utils.libsonnet'; 1, '$datasource') .addTargets( - [u.addTargetSchema(expr, - 1, - 'time_series', - legendFormat)] + [u.addTargetSchema(expr, legendFormat)] ) + { gridPos: { x: x, y: y, w: w, h: h } }; u.dashboardSchema( @@ -336,7 +329,6 @@ local u = import 'utils.libsonnet'; '', 'current', "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))", - 'time_series', 0, 1, 3, @@ -374,9 +366,9 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} ', 1, 'time_series', 'total'), - u.addTargetSchema('(node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n(node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n(node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) \n', 1, 'time_series', 'buffers/cache'), - u.addTargetSchema('(node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})- (\n (node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n (node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})\n )\n \n', 1, 'time_series', 'used'), + u.addTargetSchema('node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} ', 'total'), + u.addTargetSchema('(node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n(node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n(node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) \n', 'buffers/cache'), + u.addTargetSchema('(node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})- (\n (node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n (node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})\n )\n \n', 'used'), ] ) .addSeriesOverride( @@ -404,7 +396,7 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('sum by (device) (\n irate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m])\n)', 1, 'time_series', '{{device}}.tx'), + u.addTargetSchema('sum by (device) (\n irate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m])\n)', '{{device}}.tx'), ] ) .addSeriesOverride( @@ -427,7 +419,7 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema( - 'irate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', 1, 'time_series', '{{device}}.tx' + 'irate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', '{{device}}.tx' ), ] ) @@ -443,7 +435,6 @@ local u = import 'utils.libsonnet'; 'Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.', 'current', 'sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~"($ceph_hosts)([\\\\.:].*)?"})', - 'time_series', 0, 6, 3, @@ -465,7 +456,7 @@ local u = import 'utils.libsonnet'; ) .addTargets( [u.addTargetSchema( - 'irate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', 1, 'time_series', '{{device}}.tx' + 'irate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', '{{device}}.tx' )] ) .addSeriesOverride( @@ -495,8 +486,6 @@ local u = import 'utils.libsonnet'; [ u.addTargetSchema( 'label_replace(\n (irate(node_disk_reads_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or irate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])),\n "instance",\n "$1",\n "instance",\n "([^:.]*).*"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n "device",\n "$1",\n "device",\n "/dev/(.*)"\n ),\n "instance",\n "$1",\n "instance",\n "([^:.]*).*"\n )', - 1, - 'time_series', '{{device}}({{ceph_daemon}}) reads' ), ] @@ -521,8 +510,6 @@ local u = import 'utils.libsonnet'; .addTargets( [u.addTargetSchema( 'label_replace((irate(node_disk_bytes_read{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or irate(node_disk_read_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])), "instance", "$1", "instance", "([^:.]*).*") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', - 1, - 'time_series', '{{device}}({{ceph_daemon}}) read' )] ) diff --git a/monitoring/ceph-mixin/dashboards/osd.libsonnet b/monitoring/ceph-mixin/dashboards/osd.libsonnet index 8b425fb395a..c5e614675e3 100644 --- a/monitoring/ceph-mixin/dashboards/osd.libsonnet +++ b/monitoring/ceph-mixin/dashboards/osd.libsonnet @@ -35,7 +35,7 @@ local u = import 'utils.libsonnet'; 1, '$datasource') .addTargets( - [u.addTargetSchema(expr, 1, 'time_series', legendFormat1)] + [u.addTargetSchema(expr, legendFormat1)] ) + { gridPos: { x: x, y: y, w: w, h: h } }; local OsdOverviewPieChartPanel(alias, description, title) = u.addPieChartSchema(alias, @@ -56,7 +56,6 @@ local u = import 'utils.libsonnet'; sparkLineShow, thresholds, expr, - targetFormat, x, y, w, @@ -75,7 +74,7 @@ local u = import 'utils.libsonnet'; thresholds ) .addTarget( - u.addTargetSchema(expr, 1, targetFormat, '') + u.addTargetSchema(expr) ) + { gridPos: { x: x, y: y, w: w, h: h } }; u.dashboardSchema( @@ -137,12 +136,10 @@ local u = import 'utils.libsonnet'; [ u.addTargetSchema( 'max (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)', - 1, - 'time_series', 'MAX read' ), u.addTargetSchema( - 'quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n)', 1, 'time_series', '@95%ile' + 'quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n)', '@95%ile' ), ], ), @@ -160,7 +157,7 @@ local u = import 'utils.libsonnet'; ) .addTarget( u.addTargetSchema( - 'topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n', 1, 'table', '' + 'topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n', '', 'table', 1, true ) ) + { gridPos: { x: 8, y: 0, w: 4, h: 8 } }, OsdOverviewGraphPanel( @@ -183,12 +180,10 @@ local u = import 'utils.libsonnet'; [ u.addTargetSchema( 'max (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)', - 1, - 'time_series', 'MAX write' ), u.addTargetSchema( - 'quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n)', 1, 'time_series', '@95%ile write' + 'quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n)', '@95%ile write' ), ], ), @@ -209,59 +204,60 @@ local u = import 'utils.libsonnet'; .addTarget( u.addTargetSchema( 'topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n ))\n)\n\n', - 1, + '', 'table', - '' + 1, + true ) ) + { gridPos: { x: 20, y: 0, w: 4, h: 8 } }, OsdOverviewPieChartPanel( {}, '', 'OSD Types Summary' ) .addTarget( - u.addTargetSchema('count by (device_class) (ceph_osd_metadata)', 1, 'time_series', '{{device_class}}') + u.addTargetSchema('count by (device_class) (ceph_osd_metadata)', '{{device_class}}') ) + { gridPos: { x: 0, y: 8, w: 4, h: 8 } }, OsdOverviewPieChartPanel( { 'Non-Encrypted': '#E5AC0E' }, '', 'OSD Objectstore Types' ) .addTarget( u.addTargetSchema( - 'count(ceph_bluefs_wal_total_bytes)', 1, 'time_series', 'bluestore' + 'count(ceph_bluefs_wal_total_bytes)', 'bluestore', 'time_series', 2 ) ) .addTarget( u.addTargetSchema( - 'absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)', 1, 'time_series', 'filestore' + 'absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)', 'filestore', 'time_series', 2 ) ) + { gridPos: { x: 4, y: 8, w: 4, h: 8 } }, OsdOverviewPieChartPanel( {}, 'The pie chart shows the various OSD sizes used within the cluster', 'OSD Size Summary' ) .addTarget(u.addTargetSchema( - 'count(ceph_osd_stat_bytes < 1099511627776)', 1, 'time_series', '<1TB' + 'count(ceph_osd_stat_bytes < 1099511627776)', '<1TB', 'time_series', 2 )) .addTarget(u.addTargetSchema( - 'count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)', 1, 'time_series', '<2TB' + 'count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)', '<2TB', 'time_series', 2 )) .addTarget(u.addTargetSchema( - 'count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)', 1, 'time_series', '<3TB' + 'count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)', '<3TB', 'time_series', 2 )) .addTarget(u.addTargetSchema( - 'count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)', 1, 'time_series', '<4TB' + 'count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)', '<4TB', 'time_series', 2 )) .addTarget(u.addTargetSchema( - 'count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)', 1, 'time_series', '<6TB' + 'count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)', '<6TB', 'time_series', 2 )) .addTarget(u.addTargetSchema( - 'count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)', 1, 'time_series', '<8TB' + 'count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)', '<8TB', 'time_series', 2 )) .addTarget(u.addTargetSchema( - 'count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)', 1, 'time_series', '<10TB' + 'count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)', '<10TB', 'time_series', 2 )) .addTarget(u.addTargetSchema( - 'count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)', 1, 'time_series', '<12TB' + 'count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)', '<12TB', 'time_series', 2 )) .addTarget(u.addTargetSchema( - 'count(ceph_osd_stat_bytes >= 13194139533312)', 1, 'time_series', '<12TB+' + 'count(ceph_osd_stat_bytes >= 13194139533312)', '<12TB+', 'time_series', 2 )) + { gridPos: { x: 8, y: 8, w: 4, h: 8 } }, g.graphPanel.new(bars=true, datasource='$datasource', @@ -275,7 +271,7 @@ local u = import 'utils.libsonnet'; min='0', nullPointMode='null') .addTarget(u.addTargetSchema( - 'ceph_osd_numpg\n', 1, 'time_series', 'PGs per OSD' + 'ceph_osd_numpg\n', 'PGs per OSD', 'time_series', 1, true )) + { gridPos: { x: 12, y: 8, w: 8, h: 8 } }, OsdOverviewSingleStatPanel( ['#d44a3a', '#299c46'], @@ -289,7 +285,6 @@ local u = import 'utils.libsonnet'; false, '.75', 'sum(ceph_bluestore_onode_hits)/(sum(ceph_bluestore_onode_hits) + sum(ceph_bluestore_onode_misses))', - 'time_series', 20, 8, 4, @@ -313,7 +308,7 @@ local u = import 'utils.libsonnet'; 8 ) .addTargets([u.addTargetSchema( - 'round(sum(irate(ceph_pool_wr[30s])))', 1, 'time_series', 'Writes' + 'round(sum(irate(ceph_pool_wr[30s])))', 'Writes' )]), ]), 'osd-device-details.json': @@ -344,10 +339,8 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema(expr1, - 1, - 'time_series', legendFormat1), - u.addTargetSchema(expr2, 1, 'time_series', legendFormat2), + u.addTargetSchema(expr2, legendFormat2), ] ) + { gridPos: { x: x, y: y, w: w, h: h } }; @@ -524,8 +517,6 @@ local u = import 'utils.libsonnet'; ) .addTarget(u.addTargetSchema( 'label_replace(irate(node_disk_io_time_seconds_total[1m]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")', - 1, - 'time_series', '{{device}} on {{instance}}' )) + { gridPos: { x: 18, y: 11, w: 6, h: 9 } }, ]), diff --git a/monitoring/ceph-mixin/dashboards/pool.libsonnet b/monitoring/ceph-mixin/dashboards/pool.libsonnet index 527c9124ba2..8fb4f815c74 100644 --- a/monitoring/ceph-mixin/dashboards/pool.libsonnet +++ b/monitoring/ceph-mixin/dashboards/pool.libsonnet @@ -9,6 +9,7 @@ local u = import 'utils.libsonnet'; description, valueName, expr, + instant, targetFormat, x, y, @@ -25,7 +26,7 @@ local u = import 'utils.libsonnet'; false, false, '') - .addTarget(u.addTargetSchema(expr, 1, targetFormat, '')) + { gridPos: { x: x, y: y, w: w, h: h } }; + .addTarget(u.addTargetSchema(expr, '', targetFormat, 1, instant)) + { gridPos: { x: x, y: y, w: w, h: h } }; local PoolOverviewStyle(alias, pattern, @@ -55,7 +56,6 @@ local u = import 'utils.libsonnet'; formatY1, labelY1, expr, - targetFormat, legendFormat, x, y, @@ -75,8 +75,6 @@ local u = import 'utils.libsonnet'; '$datasource') .addTargets( [u.addTargetSchema(expr, - 1, - 'time_series', legendFormat)] ) + { gridPos: { x: x, y: y, w: w, h: h } }; @@ -121,6 +119,7 @@ local u = import 'utils.libsonnet'; '', 'avg', 'count(ceph_pool_metadata)', + true, 'table', 0, 0, @@ -133,6 +132,7 @@ local u = import 'utils.libsonnet'; 'Count of the pools that have compression enabled', 'current', 'count(ceph_pool_metadata{compression_mode!="none"})', + null, '', 3, 0, @@ -145,6 +145,7 @@ local u = import 'utils.libsonnet'; 'Total raw capacity available to the cluster', 'current', 'sum(ceph_osd_stat_bytes)', + null, '', 6, 0, @@ -157,6 +158,7 @@ local u = import 'utils.libsonnet'; 'Total raw capacity consumed by user data and associated overheads (metadata + redundancy)', 'current', 'sum(ceph_pool_bytes_used)', + true, '', 9, 0, @@ -169,6 +171,7 @@ local u = import 'utils.libsonnet'; 'Total of client data stored in the cluster', 'current', 'sum(ceph_pool_stored)', + true, '', 12, 0, @@ -181,6 +184,7 @@ local u = import 'utils.libsonnet'; 'A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression', 'current', 'sum(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used)', + null, '', 15, 0, @@ -193,6 +197,7 @@ local u = import 'utils.libsonnet'; 'Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data\n', 'current', '(sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)) * 100', + null, 'table', 18, 0, @@ -205,6 +210,7 @@ local u = import 'utils.libsonnet'; 'This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)', 'current', 'sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_compress_bytes_used > 0)', + null, '', 21, 0, @@ -244,62 +250,70 @@ local u = import 'utils.libsonnet'; [ u.addTargetSchema( '(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5)', - 1, + 'A', 'table', - 'A' + 1, + true ), u.addTargetSchema( 'ceph_pool_max_avail * on(pool_id) group_left(name) ceph_pool_metadata', - 1, + 'B', 'table', - 'B' + 1, + true ), u.addTargetSchema( '((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100', - 1, + 'C', 'table', - 'C' + 1, + true ), u.addTargetSchema( '(ceph_pool_percent_used * on(pool_id) group_left(name) ceph_pool_metadata)', - 1, + 'D', 'table', - 'D' + 1, + true ), u.addTargetSchema( '(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used > 0)', - 1, + 'E', 'table', - 'E' + 1, + true ), u.addTargetSchema( - 'delta(ceph_pool_stored[5d])', 1, 'table', 'F' + 'delta(ceph_pool_stored[5d])', 'F', 'table', 1, true ), u.addTargetSchema( 'rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])', - 1, + 'G', 'table', - 'G' + 1, + true ), u.addTargetSchema( 'rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])', - 1, + 'H', 'table', - 'H' + 1, + true ), u.addTargetSchema( - 'ceph_pool_metadata', 1, 'table', 'I' + 'ceph_pool_metadata', 'I', 'table', 1, true ), u.addTargetSchema( 'ceph_pool_stored * on(pool_id) group_left ceph_pool_metadata', - 1, + 'J', 'table', - 'J' + 1, + true ), u.addTargetSchema( - 'ceph_pool_metadata{compression_mode!="none"}', 1, 'table', 'K' + 'ceph_pool_metadata{compression_mode!="none"}', 'K', 'table', 1, true ), - u.addTargetSchema('', '', '', 'L'), + u.addTargetSchema('', 'L', '', '', null), ] ) + { gridPos: { x: 0, y: 3, w: 24, h: 6 } }, PoolOverviewGraphPanel( @@ -308,7 +322,6 @@ local u = import 'utils.libsonnet'; 'short', 'IOPS', 'topk($topk,round((rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),1) * on(pool_id) group_left(instance,name) ceph_pool_metadata) ', - 'time_series', '{{name}} ', 0, 9, @@ -318,8 +331,6 @@ local u = import 'utils.libsonnet'; .addTarget( u.addTargetSchema( 'topk($topk,rate(ceph_pool_wr[30s]) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ', - 1, - 'time_series', '{{name}} - write' ) ), @@ -329,7 +340,6 @@ local u = import 'utils.libsonnet'; 'Bps', 'Throughput', 'topk($topk,(rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) * on(pool_id) group_left(instance,name) ceph_pool_metadata)', - 'time_series', '{{name}}', 12, 9, @@ -342,7 +352,6 @@ local u = import 'utils.libsonnet'; 'bytes', 'Capacity Used', 'ceph_pool_bytes_used * on(pool_id) group_right ceph_pool_metadata', - '', '{{name}}', 0, 17, @@ -377,7 +386,7 @@ local u = import 'utils.libsonnet'; gaugeShow, sparkLineShow, thresholds) - .addTarget(u.addTargetSchema(expr, 1, targetFormat, '')) + { gridPos: { x: x, y: y, w: w, h: h } }; + .addTarget(u.addTargetSchema(expr, '', targetFormat)) + { gridPos: { x: x, y: y, w: w, h: h } }; local PoolDetailGraphPanel(alias, title, @@ -385,7 +394,6 @@ local u = import 'utils.libsonnet'; formatY1, labelY1, expr, - targetFormat, legendFormat, x, y, @@ -404,7 +412,7 @@ local u = import 'utils.libsonnet'; 1, '$datasource') .addTargets( - [u.addTargetSchema(expr, 1, 'time_series', legendFormat)] + [u.addTargetSchema(expr, legendFormat)] ) + { gridPos: { x: x, y: y, w: w, h: h } }; u.dashboardSchema( @@ -503,7 +511,6 @@ local u = import 'utils.libsonnet'; 'ops', 'Objects out(-) / in(+) ', 'deriv(ceph_pool_objects[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', - 'time_series', 'Objects per second', 12, 0, @@ -514,12 +521,12 @@ local u = import 'utils.libsonnet'; { read_op_per_sec: '#3F6833', write_op_per_sec: '#E5AC0E', - }, '$pool_name Client IOPS', '', 'iops', 'Read (-) / Write (+)', 'irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'time_series', 'reads', 0, 7, 12, 7 + }, '$pool_name Client IOPS', '', 'iops', 'Read (-) / Write (+)', 'irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'reads', 0, 7, 12, 7 ) .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' }) .addTarget( u.addTargetSchema( - 'irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 1, 'time_series', 'writes' + 'irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'writes' ) ), PoolDetailGraphPanel( @@ -532,7 +539,6 @@ local u = import 'utils.libsonnet'; 'Bps', 'Read (-) / Write (+)', 'irate(ceph_pool_rd_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', - 'time_series', 'reads', 12, 7, @@ -543,8 +549,6 @@ local u = import 'utils.libsonnet'; .addTarget( u.addTargetSchema( 'irate(ceph_pool_wr_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', - 1, - 'time_series', 'writes' ) ), @@ -558,7 +562,6 @@ local u = import 'utils.libsonnet'; 'short', 'Objects', 'ceph_pool_objects * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', - 'time_series', 'Number of Objects', 0, 14, diff --git a/monitoring/ceph-mixin/dashboards/rbd.libsonnet b/monitoring/ceph-mixin/dashboards/rbd.libsonnet index d464f889f54..a4ca6982d3b 100644 --- a/monitoring/ceph-mixin/dashboards/rbd.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rbd.libsonnet @@ -20,10 +20,8 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema(expr1, - 1, - 'time_series', '{{pool}} Write'), - u.addTargetSchema(expr2, 1, 'time_series', '{{pool}} Read'), + u.addTargetSchema(expr2, '{{pool}} Read'), ] ) + { gridPos: { x: x, y: y, w: w, h: h } }; @@ -151,12 +149,8 @@ local u = import 'utils.libsonnet'; .addTargets( [ u.addTargetSchema(expr1, - 1, - 'time_series', legendFormat1), u.addTargetSchema(expr2, - 1, - 'time_series', legendFormat2), ] ) + { gridPos: { x: x, y: y, w: w, h: h } }; @@ -257,9 +251,10 @@ local u = import 'utils.libsonnet'; .addTarget( u.addTargetSchema( 'topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))', - 1, + '', 'table', - '' + 1, + true ) ) + { gridPos: { x: 0, y: 7, w: 8, h: 7 } }, u.addTableSchema( @@ -278,9 +273,10 @@ local u = import 'utils.libsonnet'; .addTarget( u.addTargetSchema( 'topk(10, sort(sum(irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s])) by (pool, image, namespace)))', - 1, + '', 'table', - '' + 1, + true ) ) + { gridPos: { x: 8, y: 7, w: 8, h: 7 } }, u.addTableSchema( @@ -299,9 +295,10 @@ local u = import 'utils.libsonnet'; .addTarget( u.addTargetSchema( 'topk(10,\n sum(\n irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) +\n irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1)\n ) by (pool, image, namespace)\n)', - 1, + '', 'table', - '' + 1, + true ) ) + { gridPos: { x: 16, y: 7, w: 8, h: 7 } }, ]), diff --git a/monitoring/ceph-mixin/dashboards/rgw.libsonnet b/monitoring/ceph-mixin/dashboards/rgw.libsonnet index e0ad25fb59b..f7f76187f14 100644 --- a/monitoring/ceph-mixin/dashboards/rgw.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rgw.libsonnet @@ -19,8 +19,6 @@ local u = import 'utils.libsonnet'; '$datasource') .addTargets( [u.addTargetSchema('sum by (source_zone) (rate(%s[30s]))' % rgwMetric, - 1, - 'time_series', '{{source_zone}}')] ) + { gridPos: { x: x, y: y, w: w, h: h } }; @@ -144,7 +142,7 @@ local u = import 'utils.libsonnet'; legend_values ) .addTargets( - [u.addTargetSchema(expr1, 1, 'time_series', legendFormat1)] + [u.addTargetSchema(expr1, legendFormat1)] ) + { gridPos: { x: x, y: y, w: w, h: h } }; u.dashboardSchema( @@ -242,8 +240,6 @@ local u = import 'utils.libsonnet'; [ u.addTargetSchema( 'rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata', - 1, - 'time_series', 'PUT AVG' ), ] @@ -285,8 +281,6 @@ local u = import 'utils.libsonnet'; 6 ).addTargets( [u.addTargetSchema('sum(rate(ceph_rgw_put_b[30s]))', - 1, - 'time_series', 'PUTs')] ), RgwOverviewPanel( @@ -336,7 +330,7 @@ local u = import 'utils.libsonnet'; true ) .addTargets( - [u.addTargetSchema('sum(irate(haproxy_backend_http_responses_total{code=~"$code",instance=~"$ingress_service",proxy=~"backend"}[5m])) by (code)', 1, 'time_series', 'Backend {{ code }}')] + [u.addTargetSchema('sum(irate(haproxy_backend_http_responses_total{code=~"$code",instance=~"$ingress_service",proxy=~"backend"}[5m])) by (code)', 'Backend {{ code }}')] ) .addSeriesOverride([ { @@ -371,12 +365,12 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('sum(irate(haproxy_backend_response_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Response errors'), - u.addTargetSchema('sum(irate(haproxy_frontend_request_errors_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 1, 'time_series', 'Requests errors'), - u.addTargetSchema('sum(irate(haproxy_backend_redispatch_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Backend redispatch'), - u.addTargetSchema('sum(irate(haproxy_backend_retry_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Backend retry'), - u.addTargetSchema('sum(irate(haproxy_frontend_requests_denied_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Request denied'), - u.addTargetSchema('sum(haproxy_backend_current_queue{proxy=~"backend",instance=~"$ingress_service"}) by (instance)', 2, 'time_series', 'Backend Queued'), + u.addTargetSchema('sum(irate(haproxy_backend_response_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Response errors', 'time_series', 2), + u.addTargetSchema('sum(irate(haproxy_frontend_request_errors_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 'Requests errors'), + u.addTargetSchema('sum(irate(haproxy_backend_redispatch_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Backend redispatch', 'time_series', 2), + u.addTargetSchema('sum(irate(haproxy_backend_retry_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Backend retry', 'time_series', 2), + u.addTargetSchema('sum(irate(haproxy_frontend_requests_denied_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 'Request denied', 'time_series', 2), + u.addTargetSchema('sum(haproxy_backend_current_queue{proxy=~"backend",instance=~"$ingress_service"}) by (instance)', 'Backend Queued', 'time_series', 2), ] ) .addSeriesOverride([ @@ -410,8 +404,8 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('sum(irate(haproxy_backend_connection_attempts_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 1, 'time_series', 'Back'), - u.addTargetSchema('sum(irate(haproxy_backend_connection_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 1, 'time_series', 'Back errors'), + u.addTargetSchema('sum(irate(haproxy_backend_connection_attempts_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Back'), + u.addTargetSchema('sum(irate(haproxy_backend_connection_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Back errors'), ] ) .addSeriesOverride([ @@ -441,9 +435,9 @@ local u = import 'utils.libsonnet'; ) .addTargets( [ - u.addTargetSchema('sum(irate(haproxy_frontend_bytes_out_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])*8) by (instance)', 2, 'time_series', 'OUT Front'), - u.addTargetSchema('sum(irate(haproxy_backend_bytes_in_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 2, 'time_series', 'IN Back'), - u.addTargetSchema('sum(irate(haproxy_backend_bytes_out_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 2, 'time_series', 'OUT Back'), + u.addTargetSchema('sum(irate(haproxy_frontend_bytes_out_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'OUT Front', 'time_series', 2), + u.addTargetSchema('sum(irate(haproxy_backend_bytes_in_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'IN Back', 'time_series', 2), + u.addTargetSchema('sum(irate(haproxy_backend_bytes_out_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'OUT Back', 'time_series', 2), ] ) .addSeriesOverride([ @@ -480,7 +474,7 @@ local u = import 'utils.libsonnet'; 1, '$datasource') .addTargets( - [u.addTargetSchema(expr1, 1, 'time_series', legendFormat1), u.addTargetSchema(expr2, 1, 'time_series', legendFormat2)] + [u.addTargetSchema(expr1, legendFormat1), u.addTargetSchema(expr2, legendFormat2)] ) + { gridPos: { x: x, y: y, w: w, h: h } }; u.dashboardSchema( @@ -593,14 +587,10 @@ local u = import 'utils.libsonnet'; [ u.addTargetSchema( 'rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', - 1, - 'time_series', 'PUTs {{ceph_daemon}}' ), u.addTargetSchema( '(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', - 1, - 'time_series', 'Other {{ceph_daemon}}' ), ] @@ -616,26 +606,18 @@ local u = import 'utils.libsonnet'; ) .addTarget(u.addTargetSchema( 'rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', - 1, - 'time_series', 'Failures {{ceph_daemon}}' )) .addTarget(u.addTargetSchema( 'rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', - 1, - 'time_series', 'GETs {{ceph_daemon}}' )) .addTarget(u.addTargetSchema( 'rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', - 1, - 'time_series', 'PUTs {{ceph_daemon}}' )) .addTarget(u.addTargetSchema( '(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', - 1, - 'time_series', 'Other (DELETE,LIST) {{ceph_daemon}}' )) + { gridPos: { x: 20, y: 1, w: 4, h: 8 } }, ]), diff --git a/monitoring/ceph-mixin/dashboards/utils.libsonnet b/monitoring/ceph-mixin/dashboards/utils.libsonnet index f31c0ffe578..1f25d370c2a 100644 --- a/monitoring/ceph-mixin/dashboards/utils.libsonnet +++ b/monitoring/ceph-mixin/dashboards/utils.libsonnet @@ -58,11 +58,12 @@ local g = import 'grafonnet/grafana.libsonnet'; legend_values=legend_values), - addTargetSchema(expr, intervalFactor, format, legendFormat):: + addTargetSchema(expr, legendFormat='', format='time_series', intervalFactor=1, instant=null):: g.prometheus.target(expr=expr, - intervalFactor=intervalFactor, + legendFormat=legendFormat, format=format, - legendFormat=legendFormat), + intervalFactor=intervalFactor, + instant=instant), addTemplateSchema(name, datasource, diff --git a/monitoring/ceph-mixin/dashboards_out/hosts-overview.json b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json index 462ddf37bda..3572d7ad413 100644 --- a/monitoring/ceph-mixin/dashboards_out/hosts-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json @@ -106,6 +106,7 @@ { "expr": "count(sum by (hostname) (ceph_osd_metadata))", "format": "time_series", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -188,6 +189,7 @@ { "expr": "avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )", "format": "time_series", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -270,6 +272,7 @@ { "expr": "avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"} ))", "format": "time_series", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -352,6 +355,7 @@ { "expr": "sum ((irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])))", "format": "time_series", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -434,6 +438,7 @@ { "expr": "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)", "format": "time_series", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -516,6 +521,7 @@ { "expr": "sum (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n )\n", "format": "time_series", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" diff --git a/monitoring/ceph-mixin/dashboards_out/osds-overview.json b/monitoring/ceph-mixin/dashboards_out/osds-overview.json index dc05689ecb4..ffcf0601563 100644 --- a/monitoring/ceph-mixin/dashboards_out/osds-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/osds-overview.json @@ -224,6 +224,7 @@ { "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -410,6 +411,7 @@ { "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n ))\n)\n\n", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -476,14 +478,14 @@ { "expr": "count(ceph_bluefs_wal_total_bytes)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "bluestore", "refId": "A" }, { "expr": "absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "filestore", "refId": "B" } @@ -514,63 +516,63 @@ { "expr": "count(ceph_osd_stat_bytes < 1099511627776)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "<1TB", "refId": "A" }, { "expr": "count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "<2TB", "refId": "B" }, { "expr": "count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "<3TB", "refId": "C" }, { "expr": "count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "<4TB", "refId": "D" }, { "expr": "count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "<6TB", "refId": "E" }, { "expr": "count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "<8TB", "refId": "F" }, { "expr": "count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "<10TB", "refId": "G" }, { "expr": "count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "<12TB", "refId": "H" }, { "expr": "count(ceph_osd_stat_bytes >= 13194139533312)", "format": "time_series", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "<12TB+", "refId": "I" } @@ -623,6 +625,7 @@ { "expr": "ceph_osd_numpg\n", "format": "time_series", + "instant": true, "intervalFactor": 1, "legendFormat": "PGs per OSD", "refId": "A" diff --git a/monitoring/ceph-mixin/dashboards_out/pool-overview.json b/monitoring/ceph-mixin/dashboards_out/pool-overview.json index d70d4c7ae02..5767d40eb1a 100644 --- a/monitoring/ceph-mixin/dashboards_out/pool-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/pool-overview.json @@ -87,6 +87,7 @@ { "expr": "count(ceph_pool_metadata)", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -333,6 +334,7 @@ { "expr": "sum(ceph_pool_bytes_used)", "format": "", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -415,6 +417,7 @@ { "expr": "sum(ceph_pool_stored)", "format": "", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -1052,6 +1055,7 @@ { "expr": "(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5)", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "A", "refId": "A" @@ -1059,6 +1063,7 @@ { "expr": "ceph_pool_max_avail * on(pool_id) group_left(name) ceph_pool_metadata", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "B", "refId": "B" @@ -1066,6 +1071,7 @@ { "expr": "((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "C", "refId": "C" @@ -1073,6 +1079,7 @@ { "expr": "(ceph_pool_percent_used * on(pool_id) group_left(name) ceph_pool_metadata)", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "D", "refId": "D" @@ -1080,6 +1087,7 @@ { "expr": "(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used > 0)", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "E", "refId": "E" @@ -1087,6 +1095,7 @@ { "expr": "delta(ceph_pool_stored[5d])", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "F", "refId": "F" @@ -1094,6 +1103,7 @@ { "expr": "rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "G", "refId": "G" @@ -1101,6 +1111,7 @@ { "expr": "rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "H", "refId": "H" @@ -1108,6 +1119,7 @@ { "expr": "ceph_pool_metadata", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "I", "refId": "I" @@ -1115,6 +1127,7 @@ { "expr": "ceph_pool_stored * on(pool_id) group_left ceph_pool_metadata", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "J", "refId": "J" @@ -1122,6 +1135,7 @@ { "expr": "ceph_pool_metadata{compression_mode!=\"none\"}", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "K", "refId": "K" diff --git a/monitoring/ceph-mixin/dashboards_out/rbd-overview.json b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json index 71c32ce71fb..29b82afa523 100644 --- a/monitoring/ceph-mixin/dashboards_out/rbd-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json @@ -418,6 +418,7 @@ { "expr": "topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -519,6 +520,7 @@ { "expr": "topk(10, sort(sum(irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s])) by (pool, image, namespace)))", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" @@ -620,6 +622,7 @@ { "expr": "topk(10,\n sum(\n irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) +\n irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1)\n ) by (pool, image, namespace)\n)", "format": "table", + "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A"