Merge pull request #45335 from rhcs-dashboard/fix-54513-master

mgr/dashboard: Pool overall performance shows multiple entries of same pool in pool overview

Reviewed-by: Aashish Sharma <aasharma@redhat.com>
Reviewed-by: Avan Thakkar <athakkar@redhat.com>
Reviewed-by: Ernesto Puerta <epuertat@redhat.com>
Reviewed-by: Pere Diaz Bou <pdiazbou@redhat.com>
Reviewed-by: sunilangadi2 <NOT@FOUND>
This commit is contained in:
Ernesto Puerta 2022-03-30 14:05:38 +02:00 committed by GitHub
commit 043f7953d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 148 additions and 163 deletions

View File

@ -18,7 +18,7 @@ local u = import 'utils.libsonnet';
1,
'$datasource')
.addTargets(
[u.addTargetSchema(expr, 1, 'time_series', legendFormat)]
[u.addTargetSchema(expr, legendFormat)]
) + { gridPos: { x: x, y: y, w: w, h: h } };
u.dashboardSchema(
@ -80,8 +80,6 @@ local u = import 'utils.libsonnet';
)
.addTarget(u.addTargetSchema(
'sum(rate(ceph_objecter_op_w{ceph_daemon=~"($mds_servers).*"}[1m]))',
1,
'time_series',
'Write Ops'
))
.addSeriesOverride(

View File

@ -9,7 +9,7 @@ local u = import 'utils.libsonnet';
description,
valueName,
expr,
targetFormat,
instant,
x,
y,
w,
@ -26,7 +26,7 @@ local u = import 'utils.libsonnet';
false,
'')
.addTarget(
u.addTargetSchema(expr, 1, targetFormat, '')
u.addTargetSchema(expr, '', 'time_series', 1, instant)
) + { gridPos: { x: x, y: y, w: w, h: h } };
local HostsOverviewGraphPanel(title, description, formatY1, expr, legendFormat, x, y, w, h) =
@ -35,7 +35,7 @@ local u = import 'utils.libsonnet';
)
.addTargets(
[u.addTargetSchema(
expr, 1, 'time_series', legendFormat
expr, legendFormat
)]
) + { gridPos: { x: x, y: y, w: w, h: h } };
@ -126,7 +126,7 @@ local u = import 'utils.libsonnet';
'',
'current',
'count(sum by (hostname) (ceph_osd_metadata))',
'time_series',
true,
0,
0,
4,
@ -138,7 +138,7 @@ local u = import 'utils.libsonnet';
'Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster',
'current',
'avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )',
'time_series',
true,
4,
0,
4,
@ -150,7 +150,7 @@ local u = import 'utils.libsonnet';
'Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)',
'current',
'avg (((node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_MemTotal_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"})- (\n (node_memory_MemFree{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_MemFree_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) + \n (node_memory_Cached{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_Cached_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) + \n (node_memory_Buffers{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_Buffers_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) +\n (node_memory_Slab{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_Slab_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"})\n )) /\n (node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or node_memory_MemTotal_bytes{instance=~"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*"} ))',
'time_series',
true,
8,
0,
4,
@ -162,7 +162,7 @@ local u = import 'utils.libsonnet';
'IOPS Load at the device as reported by the OS on all OSD hosts',
'current',
'sum ((irate(node_disk_reads_completed{instance=~"($osd_hosts).*"}[5m]) or irate(node_disk_reads_completed_total{instance=~"($osd_hosts).*"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~"($osd_hosts).*"}[5m]) or irate(node_disk_writes_completed_total{instance=~"($osd_hosts).*"}[5m])))',
'time_series',
true,
12,
0,
4,
@ -174,7 +174,7 @@ local u = import 'utils.libsonnet';
'Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)',
'current',
'avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), "instance", "$1", "instance", "([^.:]*).*"\n ) *\n on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~"($osd_hosts).*"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^.:]*).*")\n)',
'time_series',
true,
16,
0,
4,
@ -202,7 +202,7 @@ local u = import 'utils.libsonnet';
)
|||
,
'time_series',
true,
20,
0,
4,
@ -242,7 +242,6 @@ local u = import 'utils.libsonnet';
description,
valueName,
expr,
targetFormat,
x,
y,
w,
@ -258,10 +257,7 @@ local u = import 'utils.libsonnet';
false,
false,
'')
.addTarget(u.addTargetSchema(expr,
1,
targetFormat,
'')) + { gridPos: { x: x, y: y, w: w, h: h } };
.addTarget(u.addTargetSchema(expr)) + { gridPos: { x: x, y: y, w: w, h: h } };
local HostDetailsGraphPanel(alias,
title,
@ -288,10 +284,7 @@ local u = import 'utils.libsonnet';
1,
'$datasource')
.addTargets(
[u.addTargetSchema(expr,
1,
'time_series',
legendFormat)]
[u.addTargetSchema(expr, legendFormat)]
) + { gridPos: { x: x, y: y, w: w, h: h } };
u.dashboardSchema(
@ -336,7 +329,6 @@ local u = import 'utils.libsonnet';
'',
'current',
"count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))",
'time_series',
0,
1,
3,
@ -374,9 +366,9 @@ local u = import 'utils.libsonnet';
)
.addTargets(
[
u.addTargetSchema('node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} ', 1, 'time_series', 'total'),
u.addTargetSchema('(node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n(node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n(node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) \n', 1, 'time_series', 'buffers/cache'),
u.addTargetSchema('(node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})- (\n (node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n (node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})\n )\n \n', 1, 'time_series', 'used'),
u.addTargetSchema('node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} ', 'total'),
u.addTargetSchema('(node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n(node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n(node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) \n', 'buffers/cache'),
u.addTargetSchema('(node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})- (\n (node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) + \n (node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}) +\n (node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"})\n )\n \n', 'used'),
]
)
.addSeriesOverride(
@ -404,7 +396,7 @@ local u = import 'utils.libsonnet';
)
.addTargets(
[
u.addTargetSchema('sum by (device) (\n irate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m])\n)', 1, 'time_series', '{{device}}.tx'),
u.addTargetSchema('sum by (device) (\n irate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m])\n)', '{{device}}.tx'),
]
)
.addSeriesOverride(
@ -427,7 +419,7 @@ local u = import 'utils.libsonnet';
.addTargets(
[
u.addTargetSchema(
'irate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', 1, 'time_series', '{{device}}.tx'
'irate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', '{{device}}.tx'
),
]
)
@ -443,7 +435,6 @@ local u = import 'utils.libsonnet';
'Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.',
'current',
'sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~"($ceph_hosts)([\\\\.:].*)?"})',
'time_series',
0,
6,
3,
@ -465,7 +456,7 @@ local u = import 'utils.libsonnet';
)
.addTargets(
[u.addTargetSchema(
'irate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', 1, 'time_series', '{{device}}.tx'
'irate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or irate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])', '{{device}}.tx'
)]
)
.addSeriesOverride(
@ -495,8 +486,6 @@ local u = import 'utils.libsonnet';
[
u.addTargetSchema(
'label_replace(\n (irate(node_disk_reads_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or irate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])),\n "instance",\n "$1",\n "instance",\n "([^:.]*).*"\n)\n* on(instance, device) group_left(ceph_daemon)\n label_replace(\n label_replace(\n ceph_disk_occupation_human,\n "device",\n "$1",\n "device",\n "/dev/(.*)"\n ),\n "instance",\n "$1",\n "instance",\n "([^:.]*).*"\n )',
1,
'time_series',
'{{device}}({{ceph_daemon}}) reads'
),
]
@ -521,8 +510,6 @@ local u = import 'utils.libsonnet';
.addTargets(
[u.addTargetSchema(
'label_replace((irate(node_disk_bytes_read{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or irate(node_disk_read_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])), "instance", "$1", "instance", "([^:.]*).*") * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")',
1,
'time_series',
'{{device}}({{ceph_daemon}}) read'
)]
)

View File

@ -35,7 +35,7 @@ local u = import 'utils.libsonnet';
1,
'$datasource')
.addTargets(
[u.addTargetSchema(expr, 1, 'time_series', legendFormat1)]
[u.addTargetSchema(expr, legendFormat1)]
) + { gridPos: { x: x, y: y, w: w, h: h } };
local OsdOverviewPieChartPanel(alias, description, title) =
u.addPieChartSchema(alias,
@ -56,7 +56,6 @@ local u = import 'utils.libsonnet';
sparkLineShow,
thresholds,
expr,
targetFormat,
x,
y,
w,
@ -75,7 +74,7 @@ local u = import 'utils.libsonnet';
thresholds
)
.addTarget(
u.addTargetSchema(expr, 1, targetFormat, '')
u.addTargetSchema(expr)
) + { gridPos: { x: x, y: y, w: w, h: h } };
u.dashboardSchema(
@ -137,12 +136,10 @@ local u = import 'utils.libsonnet';
[
u.addTargetSchema(
'max (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)',
1,
'time_series',
'MAX read'
),
u.addTargetSchema(
'quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n)', 1, 'time_series', '@95%ile'
'quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n)', '@95%ile'
),
],
),
@ -160,7 +157,7 @@ local u = import 'utils.libsonnet';
)
.addTarget(
u.addTargetSchema(
'topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n', 1, 'table', ''
'topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n', '', 'table', 1, true
)
) + { gridPos: { x: 8, y: 0, w: 4, h: 8 } },
OsdOverviewGraphPanel(
@ -183,12 +180,10 @@ local u = import 'utils.libsonnet';
[
u.addTargetSchema(
'max (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)',
1,
'time_series',
'MAX write'
),
u.addTargetSchema(
'quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n)', 1, 'time_series', '@95%ile write'
'quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n)', '@95%ile write'
),
],
),
@ -209,59 +204,60 @@ local u = import 'utils.libsonnet';
.addTarget(
u.addTargetSchema(
'topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n ))\n)\n\n',
1,
'',
'table',
''
1,
true
)
) + { gridPos: { x: 20, y: 0, w: 4, h: 8 } },
OsdOverviewPieChartPanel(
{}, '', 'OSD Types Summary'
)
.addTarget(
u.addTargetSchema('count by (device_class) (ceph_osd_metadata)', 1, 'time_series', '{{device_class}}')
u.addTargetSchema('count by (device_class) (ceph_osd_metadata)', '{{device_class}}')
) + { gridPos: { x: 0, y: 8, w: 4, h: 8 } },
OsdOverviewPieChartPanel(
{ 'Non-Encrypted': '#E5AC0E' }, '', 'OSD Objectstore Types'
)
.addTarget(
u.addTargetSchema(
'count(ceph_bluefs_wal_total_bytes)', 1, 'time_series', 'bluestore'
'count(ceph_bluefs_wal_total_bytes)', 'bluestore', 'time_series', 2
)
)
.addTarget(
u.addTargetSchema(
'absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)', 1, 'time_series', 'filestore'
'absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)', 'filestore', 'time_series', 2
)
) + { gridPos: { x: 4, y: 8, w: 4, h: 8 } },
OsdOverviewPieChartPanel(
{}, 'The pie chart shows the various OSD sizes used within the cluster', 'OSD Size Summary'
)
.addTarget(u.addTargetSchema(
'count(ceph_osd_stat_bytes < 1099511627776)', 1, 'time_series', '<1TB'
'count(ceph_osd_stat_bytes < 1099511627776)', '<1TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
'count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)', 1, 'time_series', '<2TB'
'count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)', '<2TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
'count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)', 1, 'time_series', '<3TB'
'count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)', '<3TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
'count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)', 1, 'time_series', '<4TB'
'count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)', '<4TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
'count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)', 1, 'time_series', '<6TB'
'count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)', '<6TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
'count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)', 1, 'time_series', '<8TB'
'count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)', '<8TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
'count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)', 1, 'time_series', '<10TB'
'count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)', '<10TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
'count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)', 1, 'time_series', '<12TB'
'count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)', '<12TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
'count(ceph_osd_stat_bytes >= 13194139533312)', 1, 'time_series', '<12TB+'
'count(ceph_osd_stat_bytes >= 13194139533312)', '<12TB+', 'time_series', 2
)) + { gridPos: { x: 8, y: 8, w: 4, h: 8 } },
g.graphPanel.new(bars=true,
datasource='$datasource',
@ -275,7 +271,7 @@ local u = import 'utils.libsonnet';
min='0',
nullPointMode='null')
.addTarget(u.addTargetSchema(
'ceph_osd_numpg\n', 1, 'time_series', 'PGs per OSD'
'ceph_osd_numpg\n', 'PGs per OSD', 'time_series', 1, true
)) + { gridPos: { x: 12, y: 8, w: 8, h: 8 } },
OsdOverviewSingleStatPanel(
['#d44a3a', '#299c46'],
@ -289,7 +285,6 @@ local u = import 'utils.libsonnet';
false,
'.75',
'sum(ceph_bluestore_onode_hits)/(sum(ceph_bluestore_onode_hits) + sum(ceph_bluestore_onode_misses))',
'time_series',
20,
8,
4,
@ -313,7 +308,7 @@ local u = import 'utils.libsonnet';
8
)
.addTargets([u.addTargetSchema(
'round(sum(irate(ceph_pool_wr[30s])))', 1, 'time_series', 'Writes'
'round(sum(irate(ceph_pool_wr[30s])))', 'Writes'
)]),
]),
'osd-device-details.json':
@ -344,10 +339,8 @@ local u = import 'utils.libsonnet';
.addTargets(
[
u.addTargetSchema(expr1,
1,
'time_series',
legendFormat1),
u.addTargetSchema(expr2, 1, 'time_series', legendFormat2),
u.addTargetSchema(expr2, legendFormat2),
]
) + { gridPos: { x: x, y: y, w: w, h: h } };
@ -524,8 +517,6 @@ local u = import 'utils.libsonnet';
)
.addTarget(u.addTargetSchema(
'label_replace(irate(node_disk_io_time_seconds_total[1m]), "instance", "$1", "instance", "([^:.]*).*") and on (instance, device) label_replace(label_replace(ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*")',
1,
'time_series',
'{{device}} on {{instance}}'
)) + { gridPos: { x: 18, y: 11, w: 6, h: 9 } },
]),

View File

@ -9,6 +9,7 @@ local u = import 'utils.libsonnet';
description,
valueName,
expr,
instant,
targetFormat,
x,
y,
@ -25,7 +26,7 @@ local u = import 'utils.libsonnet';
false,
false,
'')
.addTarget(u.addTargetSchema(expr, 1, targetFormat, '')) + { gridPos: { x: x, y: y, w: w, h: h } };
.addTarget(u.addTargetSchema(expr, '', targetFormat, 1, instant)) + { gridPos: { x: x, y: y, w: w, h: h } };
local PoolOverviewStyle(alias,
pattern,
@ -55,7 +56,6 @@ local u = import 'utils.libsonnet';
formatY1,
labelY1,
expr,
targetFormat,
legendFormat,
x,
y,
@ -75,8 +75,6 @@ local u = import 'utils.libsonnet';
'$datasource')
.addTargets(
[u.addTargetSchema(expr,
1,
'time_series',
legendFormat)]
) + { gridPos: { x: x, y: y, w: w, h: h } };
@ -121,6 +119,7 @@ local u = import 'utils.libsonnet';
'',
'avg',
'count(ceph_pool_metadata)',
true,
'table',
0,
0,
@ -133,6 +132,7 @@ local u = import 'utils.libsonnet';
'Count of the pools that have compression enabled',
'current',
'count(ceph_pool_metadata{compression_mode!="none"})',
null,
'',
3,
0,
@ -145,6 +145,7 @@ local u = import 'utils.libsonnet';
'Total raw capacity available to the cluster',
'current',
'sum(ceph_osd_stat_bytes)',
null,
'',
6,
0,
@ -157,6 +158,7 @@ local u = import 'utils.libsonnet';
'Total raw capacity consumed by user data and associated overheads (metadata + redundancy)',
'current',
'sum(ceph_pool_bytes_used)',
true,
'',
9,
0,
@ -169,6 +171,7 @@ local u = import 'utils.libsonnet';
'Total of client data stored in the cluster',
'current',
'sum(ceph_pool_stored)',
true,
'',
12,
0,
@ -181,6 +184,7 @@ local u = import 'utils.libsonnet';
'A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression',
'current',
'sum(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used)',
null,
'',
15,
0,
@ -193,6 +197,7 @@ local u = import 'utils.libsonnet';
'Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data\n',
'current',
'(sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)) * 100',
null,
'table',
18,
0,
@ -205,6 +210,7 @@ local u = import 'utils.libsonnet';
'This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)',
'current',
'sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_compress_bytes_used > 0)',
null,
'',
21,
0,
@ -244,62 +250,70 @@ local u = import 'utils.libsonnet';
[
u.addTargetSchema(
'(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5)',
1,
'A',
'table',
'A'
1,
true
),
u.addTargetSchema(
'ceph_pool_max_avail * on(pool_id) group_left(name) ceph_pool_metadata',
1,
'B',
'table',
'B'
1,
true
),
u.addTargetSchema(
'((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100',
1,
'C',
'table',
'C'
1,
true
),
u.addTargetSchema(
'(ceph_pool_percent_used * on(pool_id) group_left(name) ceph_pool_metadata)',
1,
'D',
'table',
'D'
1,
true
),
u.addTargetSchema(
'(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used > 0)',
1,
'E',
'table',
'E'
1,
true
),
u.addTargetSchema(
'delta(ceph_pool_stored[5d])', 1, 'table', 'F'
'delta(ceph_pool_stored[5d])', 'F', 'table', 1, true
),
u.addTargetSchema(
'rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])',
1,
'G',
'table',
'G'
1,
true
),
u.addTargetSchema(
'rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])',
1,
'H',
'table',
'H'
1,
true
),
u.addTargetSchema(
'ceph_pool_metadata', 1, 'table', 'I'
'ceph_pool_metadata', 'I', 'table', 1, true
),
u.addTargetSchema(
'ceph_pool_stored * on(pool_id) group_left ceph_pool_metadata',
1,
'J',
'table',
'J'
1,
true
),
u.addTargetSchema(
'ceph_pool_metadata{compression_mode!="none"}', 1, 'table', 'K'
'ceph_pool_metadata{compression_mode!="none"}', 'K', 'table', 1, true
),
u.addTargetSchema('', '', '', 'L'),
u.addTargetSchema('', 'L', '', '', null),
]
) + { gridPos: { x: 0, y: 3, w: 24, h: 6 } },
PoolOverviewGraphPanel(
@ -308,7 +322,6 @@ local u = import 'utils.libsonnet';
'short',
'IOPS',
'topk($topk,round((rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),1) * on(pool_id) group_left(instance,name) ceph_pool_metadata) ',
'time_series',
'{{name}} ',
0,
9,
@ -318,8 +331,6 @@ local u = import 'utils.libsonnet';
.addTarget(
u.addTargetSchema(
'topk($topk,rate(ceph_pool_wr[30s]) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ',
1,
'time_series',
'{{name}} - write'
)
),
@ -329,7 +340,6 @@ local u = import 'utils.libsonnet';
'Bps',
'Throughput',
'topk($topk,(rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) * on(pool_id) group_left(instance,name) ceph_pool_metadata)',
'time_series',
'{{name}}',
12,
9,
@ -342,7 +352,6 @@ local u = import 'utils.libsonnet';
'bytes',
'Capacity Used',
'ceph_pool_bytes_used * on(pool_id) group_right ceph_pool_metadata',
'',
'{{name}}',
0,
17,
@ -377,7 +386,7 @@ local u = import 'utils.libsonnet';
gaugeShow,
sparkLineShow,
thresholds)
.addTarget(u.addTargetSchema(expr, 1, targetFormat, '')) + { gridPos: { x: x, y: y, w: w, h: h } };
.addTarget(u.addTargetSchema(expr, '', targetFormat)) + { gridPos: { x: x, y: y, w: w, h: h } };
local PoolDetailGraphPanel(alias,
title,
@ -385,7 +394,6 @@ local u = import 'utils.libsonnet';
formatY1,
labelY1,
expr,
targetFormat,
legendFormat,
x,
y,
@ -404,7 +412,7 @@ local u = import 'utils.libsonnet';
1,
'$datasource')
.addTargets(
[u.addTargetSchema(expr, 1, 'time_series', legendFormat)]
[u.addTargetSchema(expr, legendFormat)]
) + { gridPos: { x: x, y: y, w: w, h: h } };
u.dashboardSchema(
@ -503,7 +511,6 @@ local u = import 'utils.libsonnet';
'ops',
'Objects out(-) / in(+) ',
'deriv(ceph_pool_objects[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
'time_series',
'Objects per second',
12,
0,
@ -514,12 +521,12 @@ local u = import 'utils.libsonnet';
{
read_op_per_sec: '#3F6833',
write_op_per_sec: '#E5AC0E',
}, '$pool_name Client IOPS', '', 'iops', 'Read (-) / Write (+)', 'irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'time_series', 'reads', 0, 7, 12, 7
}, '$pool_name Client IOPS', '', 'iops', 'Read (-) / Write (+)', 'irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'reads', 0, 7, 12, 7
)
.addSeriesOverride({ alias: 'reads', transform: 'negative-Y' })
.addTarget(
u.addTargetSchema(
'irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 1, 'time_series', 'writes'
'irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}', 'writes'
)
),
PoolDetailGraphPanel(
@ -532,7 +539,6 @@ local u = import 'utils.libsonnet';
'Bps',
'Read (-) / Write (+)',
'irate(ceph_pool_rd_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
'time_series',
'reads',
12,
7,
@ -543,8 +549,6 @@ local u = import 'utils.libsonnet';
.addTarget(
u.addTargetSchema(
'irate(ceph_pool_wr_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
1,
'time_series',
'writes'
)
),
@ -558,7 +562,6 @@ local u = import 'utils.libsonnet';
'short',
'Objects',
'ceph_pool_objects * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}',
'time_series',
'Number of Objects',
0,
14,

View File

@ -20,10 +20,8 @@ local u = import 'utils.libsonnet';
.addTargets(
[
u.addTargetSchema(expr1,
1,
'time_series',
'{{pool}} Write'),
u.addTargetSchema(expr2, 1, 'time_series', '{{pool}} Read'),
u.addTargetSchema(expr2, '{{pool}} Read'),
]
) + { gridPos: { x: x, y: y, w: w, h: h } };
@ -151,12 +149,8 @@ local u = import 'utils.libsonnet';
.addTargets(
[
u.addTargetSchema(expr1,
1,
'time_series',
legendFormat1),
u.addTargetSchema(expr2,
1,
'time_series',
legendFormat2),
]
) + { gridPos: { x: x, y: y, w: w, h: h } };
@ -257,9 +251,10 @@ local u = import 'utils.libsonnet';
.addTarget(
u.addTargetSchema(
'topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))',
1,
'',
'table',
''
1,
true
)
) + { gridPos: { x: 0, y: 7, w: 8, h: 7 } },
u.addTableSchema(
@ -278,9 +273,10 @@ local u = import 'utils.libsonnet';
.addTarget(
u.addTargetSchema(
'topk(10, sort(sum(irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s])) by (pool, image, namespace)))',
1,
'',
'table',
''
1,
true
)
) + { gridPos: { x: 8, y: 7, w: 8, h: 7 } },
u.addTableSchema(
@ -299,9 +295,10 @@ local u = import 'utils.libsonnet';
.addTarget(
u.addTargetSchema(
'topk(10,\n sum(\n irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) +\n irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1)\n ) by (pool, image, namespace)\n)',
1,
'',
'table',
''
1,
true
)
) + { gridPos: { x: 16, y: 7, w: 8, h: 7 } },
]),

View File

@ -19,8 +19,6 @@ local u = import 'utils.libsonnet';
'$datasource')
.addTargets(
[u.addTargetSchema('sum by (source_zone) (rate(%s[30s]))' % rgwMetric,
1,
'time_series',
'{{source_zone}}')]
) + { gridPos: { x: x, y: y, w: w, h: h } };
@ -144,7 +142,7 @@ local u = import 'utils.libsonnet';
legend_values
)
.addTargets(
[u.addTargetSchema(expr1, 1, 'time_series', legendFormat1)]
[u.addTargetSchema(expr1, legendFormat1)]
) + { gridPos: { x: x, y: y, w: w, h: h } };
u.dashboardSchema(
@ -242,8 +240,6 @@ local u = import 'utils.libsonnet';
[
u.addTargetSchema(
'rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata',
1,
'time_series',
'PUT AVG'
),
]
@ -285,8 +281,6 @@ local u = import 'utils.libsonnet';
6
).addTargets(
[u.addTargetSchema('sum(rate(ceph_rgw_put_b[30s]))',
1,
'time_series',
'PUTs')]
),
RgwOverviewPanel(
@ -336,7 +330,7 @@ local u = import 'utils.libsonnet';
true
)
.addTargets(
[u.addTargetSchema('sum(irate(haproxy_backend_http_responses_total{code=~"$code",instance=~"$ingress_service",proxy=~"backend"}[5m])) by (code)', 1, 'time_series', 'Backend {{ code }}')]
[u.addTargetSchema('sum(irate(haproxy_backend_http_responses_total{code=~"$code",instance=~"$ingress_service",proxy=~"backend"}[5m])) by (code)', 'Backend {{ code }}')]
)
.addSeriesOverride([
{
@ -371,12 +365,12 @@ local u = import 'utils.libsonnet';
)
.addTargets(
[
u.addTargetSchema('sum(irate(haproxy_backend_response_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Response errors'),
u.addTargetSchema('sum(irate(haproxy_frontend_request_errors_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 1, 'time_series', 'Requests errors'),
u.addTargetSchema('sum(irate(haproxy_backend_redispatch_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Backend redispatch'),
u.addTargetSchema('sum(irate(haproxy_backend_retry_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Backend retry'),
u.addTargetSchema('sum(irate(haproxy_frontend_requests_denied_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Request denied'),
u.addTargetSchema('sum(haproxy_backend_current_queue{proxy=~"backend",instance=~"$ingress_service"}) by (instance)', 2, 'time_series', 'Backend Queued'),
u.addTargetSchema('sum(irate(haproxy_backend_response_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Response errors', 'time_series', 2),
u.addTargetSchema('sum(irate(haproxy_frontend_request_errors_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 'Requests errors'),
u.addTargetSchema('sum(irate(haproxy_backend_redispatch_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Backend redispatch', 'time_series', 2),
u.addTargetSchema('sum(irate(haproxy_backend_retry_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Backend retry', 'time_series', 2),
u.addTargetSchema('sum(irate(haproxy_frontend_requests_denied_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 'Request denied', 'time_series', 2),
u.addTargetSchema('sum(haproxy_backend_current_queue{proxy=~"backend",instance=~"$ingress_service"}) by (instance)', 'Backend Queued', 'time_series', 2),
]
)
.addSeriesOverride([
@ -410,8 +404,8 @@ local u = import 'utils.libsonnet';
)
.addTargets(
[
u.addTargetSchema('sum(irate(haproxy_backend_connection_attempts_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 1, 'time_series', 'Back'),
u.addTargetSchema('sum(irate(haproxy_backend_connection_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 1, 'time_series', 'Back errors'),
u.addTargetSchema('sum(irate(haproxy_backend_connection_attempts_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Back'),
u.addTargetSchema('sum(irate(haproxy_backend_connection_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 'Back errors'),
]
)
.addSeriesOverride([
@ -441,9 +435,9 @@ local u = import 'utils.libsonnet';
)
.addTargets(
[
u.addTargetSchema('sum(irate(haproxy_frontend_bytes_out_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])*8) by (instance)', 2, 'time_series', 'OUT Front'),
u.addTargetSchema('sum(irate(haproxy_backend_bytes_in_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 2, 'time_series', 'IN Back'),
u.addTargetSchema('sum(irate(haproxy_backend_bytes_out_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 2, 'time_series', 'OUT Back'),
u.addTargetSchema('sum(irate(haproxy_frontend_bytes_out_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'OUT Front', 'time_series', 2),
u.addTargetSchema('sum(irate(haproxy_backend_bytes_in_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'IN Back', 'time_series', 2),
u.addTargetSchema('sum(irate(haproxy_backend_bytes_out_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'OUT Back', 'time_series', 2),
]
)
.addSeriesOverride([
@ -480,7 +474,7 @@ local u = import 'utils.libsonnet';
1,
'$datasource')
.addTargets(
[u.addTargetSchema(expr1, 1, 'time_series', legendFormat1), u.addTargetSchema(expr2, 1, 'time_series', legendFormat2)]
[u.addTargetSchema(expr1, legendFormat1), u.addTargetSchema(expr2, legendFormat2)]
) + { gridPos: { x: x, y: y, w: w, h: h } };
u.dashboardSchema(
@ -593,14 +587,10 @@ local u = import 'utils.libsonnet';
[
u.addTargetSchema(
'rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
1,
'time_series',
'PUTs {{ceph_daemon}}'
),
u.addTargetSchema(
'(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
1,
'time_series',
'Other {{ceph_daemon}}'
),
]
@ -616,26 +606,18 @@ local u = import 'utils.libsonnet';
)
.addTarget(u.addTargetSchema(
'rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
1,
'time_series',
'Failures {{ceph_daemon}}'
))
.addTarget(u.addTargetSchema(
'rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
1,
'time_series',
'GETs {{ceph_daemon}}'
))
.addTarget(u.addTargetSchema(
'rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
1,
'time_series',
'PUTs {{ceph_daemon}}'
))
.addTarget(u.addTargetSchema(
'(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
1,
'time_series',
'Other (DELETE,LIST) {{ceph_daemon}}'
)) + { gridPos: { x: 20, y: 1, w: 4, h: 8 } },
]),

View File

@ -58,11 +58,12 @@ local g = import 'grafonnet/grafana.libsonnet';
legend_values=legend_values),
addTargetSchema(expr, intervalFactor, format, legendFormat)::
addTargetSchema(expr, legendFormat='', format='time_series', intervalFactor=1, instant=null)::
g.prometheus.target(expr=expr,
intervalFactor=intervalFactor,
legendFormat=legendFormat,
format=format,
legendFormat=legendFormat),
intervalFactor=intervalFactor,
instant=instant),
addTemplateSchema(name,
datasource,

View File

@ -106,6 +106,7 @@
{
"expr": "count(sum by (hostname) (ceph_osd_metadata))",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -188,6 +189,7 @@
{
"expr": "avg(\n 1 - (\n avg by(instance) \n (irate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or\n irate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]))\n )\n )",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -270,6 +272,7 @@
{
"expr": "avg (((node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})- (\n (node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) + \n (node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"})\n )) /\n (node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"} ))",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -352,6 +355,7 @@
{
"expr": "sum ((irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])))",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -434,6 +438,7 @@
{
"expr": "avg (\n label_replace((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) *\n on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation_human{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\")\n)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -516,6 +521,7 @@
{
"expr": "sum (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n )\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"

View File

@ -224,6 +224,7 @@
{
"expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -410,6 +411,7 @@
{
"expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n ))\n)\n\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -476,14 +478,14 @@
{
"expr": "count(ceph_bluefs_wal_total_bytes)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "bluestore",
"refId": "A"
},
{
"expr": "absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "filestore",
"refId": "B"
}
@ -514,63 +516,63 @@
{
"expr": "count(ceph_osd_stat_bytes < 1099511627776)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "<1TB",
"refId": "A"
},
{
"expr": "count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "<2TB",
"refId": "B"
},
{
"expr": "count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "<3TB",
"refId": "C"
},
{
"expr": "count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "<4TB",
"refId": "D"
},
{
"expr": "count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "<6TB",
"refId": "E"
},
{
"expr": "count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "<8TB",
"refId": "F"
},
{
"expr": "count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "<10TB",
"refId": "G"
},
{
"expr": "count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "<12TB",
"refId": "H"
},
{
"expr": "count(ceph_osd_stat_bytes >= 13194139533312)",
"format": "time_series",
"intervalFactor": 1,
"intervalFactor": 2,
"legendFormat": "<12TB+",
"refId": "I"
}
@ -623,6 +625,7 @@
{
"expr": "ceph_osd_numpg\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "PGs per OSD",
"refId": "A"

View File

@ -87,6 +87,7 @@
{
"expr": "count(ceph_pool_metadata)",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -333,6 +334,7 @@
{
"expr": "sum(ceph_pool_bytes_used)",
"format": "",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -415,6 +417,7 @@
{
"expr": "sum(ceph_pool_stored)",
"format": "",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -1052,6 +1055,7 @@
{
"expr": "(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5)",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "A",
"refId": "A"
@ -1059,6 +1063,7 @@
{
"expr": "ceph_pool_max_avail * on(pool_id) group_left(name) ceph_pool_metadata",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "B",
"refId": "B"
@ -1066,6 +1071,7 @@
{
"expr": "((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "C",
"refId": "C"
@ -1073,6 +1079,7 @@
{
"expr": "(ceph_pool_percent_used * on(pool_id) group_left(name) ceph_pool_metadata)",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "D",
"refId": "D"
@ -1080,6 +1087,7 @@
{
"expr": "(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used > 0)",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "E",
"refId": "E"
@ -1087,6 +1095,7 @@
{
"expr": "delta(ceph_pool_stored[5d])",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "F",
"refId": "F"
@ -1094,6 +1103,7 @@
{
"expr": "rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "G",
"refId": "G"
@ -1101,6 +1111,7 @@
{
"expr": "rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "H",
"refId": "H"
@ -1108,6 +1119,7 @@
{
"expr": "ceph_pool_metadata",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "I",
"refId": "I"
@ -1115,6 +1127,7 @@
{
"expr": "ceph_pool_stored * on(pool_id) group_left ceph_pool_metadata",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "J",
"refId": "J"
@ -1122,6 +1135,7 @@
{
"expr": "ceph_pool_metadata{compression_mode!=\"none\"}",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "K",
"refId": "K"

View File

@ -418,6 +418,7 @@
{
"expr": "topk(10, (sort((irate(ceph_rbd_write_ops[30s]) + on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])))))",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -519,6 +520,7 @@
{
"expr": "topk(10, sort(sum(irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s])) by (pool, image, namespace)))",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
@ -620,6 +622,7 @@
{
"expr": "topk(10,\n sum(\n irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) +\n irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1)\n ) by (pool, image, namespace)\n)",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"