Add thresholds for memory alerts
Signed-off-by: Vitaly Zhuravlev <v-zhuravlev@users.noreply.github.com>
This commit is contained in:
parent
2111e70ac7
commit
77ae769179
|
@ -344,7 +344,7 @@
|
||||||
{
|
{
|
||||||
alert: 'NodeMemoryMajorPagesFaults',
|
alert: 'NodeMemoryMajorPagesFaults',
|
||||||
expr: |||
|
expr: |||
|
||||||
rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m]) > 500
|
rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m]) > %(memoryMajorPagesFaultsWarningThreshold)s
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
'for': '15m',
|
'for': '15m',
|
||||||
labels: {
|
labels: {
|
||||||
|
@ -353,15 +353,15 @@
|
||||||
annotations: {
|
annotations: {
|
||||||
summary: 'Memory major page faults are occurring at very high rate.',
|
summary: 'Memory major page faults are occurring at very high rate.',
|
||||||
description: |||
|
description: |||
|
||||||
Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
|
Memory major pages are occurring at very high rate at {{ $labels.instance }}, %(memoryMajorPagesFaultsWarningThreshold)s major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
|
||||||
Please check that there is enough memory available at this instance.
|
Please check that there is enough memory available at this instance.
|
||||||
|||,
|
||| % $._config,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
alert: 'NodeMemoryHighUtilization',
|
alert: 'NodeMemoryHighUtilization',
|
||||||
expr: |||
|
expr: |||
|
||||||
100 - (node_memory_MemAvailable_bytes{%(nodeExporterSelector)s} / node_memory_MemTotal_bytes{%(nodeExporterSelector)s} * 100) > 90
|
100 - (node_memory_MemAvailable_bytes{%(nodeExporterSelector)s} / node_memory_MemTotal_bytes{%(nodeExporterSelector)s} * 100) > %(memoryHighUtilizationThreshold)s
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
'for': '15m',
|
'for': '15m',
|
||||||
labels: {
|
labels: {
|
||||||
|
@ -370,7 +370,7 @@
|
||||||
annotations: {
|
annotations: {
|
||||||
summary: 'Host is running out of memory.',
|
summary: 'Host is running out of memory.',
|
||||||
description: |||
|
description: |||
|
||||||
Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
|
Memory is filling up at {{ $labels.instance }}, has been above %(memoryHighUtilizationThreshold)s% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
|
||||||
|||,
|
|||,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -60,6 +60,14 @@
|
||||||
fsSpaceAvailableWarningThreshold: 5,
|
fsSpaceAvailableWarningThreshold: 5,
|
||||||
fsSpaceAvailableCriticalThreshold: 3,
|
fsSpaceAvailableCriticalThreshold: 3,
|
||||||
|
|
||||||
|
// Memory utilzation (%) level on which to trigger the
|
||||||
|
// 'NodeMemoryHighUtilization' alert.
|
||||||
|
memoryHighUtilizationThreshold: 90,
|
||||||
|
|
||||||
|
// Threshold for the rate of memory major page faults to trigger
|
||||||
|
// 'NodeMemoryMajorPagesFaults' alert.
|
||||||
|
memoryMajorPagesFaultsWarningThreshold: 500,
|
||||||
|
|
||||||
rateInterval: '5m',
|
rateInterval: '5m',
|
||||||
// Opt-in for multi-cluster support.
|
// Opt-in for multi-cluster support.
|
||||||
showMultiCluster: false,
|
showMultiCluster: false,
|
||||||
|
|
Loading…
Reference in New Issue