mirror of
https://github.com/prometheus-community/postgres_exporter
synced 2025-04-20 14:05:24 +00:00
Expanded description (what happened, where?) can be seen in 'description' annotation instead, while summary is better used as Human readable alert name. See also https://monitoring.mixins.dev/#guidelines-for-alert-names-labels-and-annotations Signed-off-by: Vitaly Zhuravlev <zhuravlev.vitaly@gmail.com>
128 lines
5.2 KiB
Plaintext
128 lines
5.2 KiB
Plaintext
{
|
|
prometheusAlerts+:: {
|
|
groups+: [
|
|
{
|
|
name: 'PostgreSQL',
|
|
rules: [
|
|
{
|
|
alert: 'PostgreSQLMaxConnectionsReached',
|
|
annotations: {
|
|
description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy.',
|
|
summary: 'Postgres has maxed out its connections.',
|
|
},
|
|
expr: |||
|
|
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
|
|
>=
|
|
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
|
|
-
|
|
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
|
|
||| % $._config,
|
|
'for': '1m',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
},
|
|
{
|
|
alert: 'PostgreSQLHighConnections',
|
|
annotations: {
|
|
description: '{{ $labels.instance }} is exceeding 80% of the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Please check utilization graphs and confirm if this is normal service growth, abuse or an otherwise temporary condition or if new resources need to be provisioned (or the limits increased, which is mostly likely).',
|
|
summary: 'Postgres is over 80% of max connections.',
|
|
},
|
|
expr: |||
|
|
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
|
|
>
|
|
(
|
|
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
|
|
-
|
|
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
|
|
) * 0.8
|
|
||| % $._config,
|
|
'for': '10m',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
},
|
|
{
|
|
alert: 'PostgreSQLDown',
|
|
annotations: {
|
|
description: '{{ $labels.instance }} is rejecting query requests from the exporter, and thus probably not allowing DNS requests to work either. User services should not be effected provided at least 1 node is still alive.',
|
|
summary: 'Postgres is not processing queries.',
|
|
},
|
|
expr: 'pg_up{%(postgresExporterSelector)s} != 1' % $._config,
|
|
'for': '1m',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
},
|
|
{
|
|
alert: 'PostgreSQLSlowQueries',
|
|
annotations: {
|
|
description: 'PostgreSQL has high number of slow queries {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }} ',
|
|
summary: 'Postgres has high number of slow queries.',
|
|
},
|
|
expr: |||
|
|
avg by (datname) (
|
|
rate (
|
|
pg_stat_activity_max_tx_duration{datname!~"template.*",%(postgresExporterSelector)s}[2m]
|
|
)
|
|
) > 2 * 60
|
|
||| % $._config,
|
|
'for': '2m',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
},
|
|
{
|
|
alert: 'PostgreSQLQPS',
|
|
annotations: {
|
|
description: 'PostgreSQL has high number of queries per second on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}',
|
|
summary: 'Postgres has high number of queries per second.',
|
|
},
|
|
expr: |||
|
|
avg by (datname) (
|
|
irate(
|
|
pg_stat_database_xact_commit{datname!~"template.*",%(postgresExporterSelector)s}[5m]
|
|
)
|
|
+
|
|
irate(
|
|
pg_stat_database_xact_rollback{datname!~"template.*",%(postgresExporterSelector)s}[5m]
|
|
)
|
|
) > 10000
|
|
||| % $._config,
|
|
'for': '5m',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
},
|
|
{
|
|
alert: 'PostgreSQLCacheHitRatio',
|
|
annotations: {
|
|
description: 'PostgreSQL has low cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}',
|
|
summary: 'Postgres has low cache hit rate.',
|
|
},
|
|
expr: |||
|
|
avg by (datname) (
|
|
rate(pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m])
|
|
/
|
|
(
|
|
rate(
|
|
pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m]
|
|
)
|
|
+
|
|
rate(
|
|
pg_stat_database_blks_read{datname!~"template.*",%(postgresExporterSelector)s}[5m]
|
|
)
|
|
)
|
|
) < 0.98
|
|
||| % $._config,
|
|
'for': '5m',
|
|
labels: {
|
|
severity: 'warning',
|
|
},
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
}
|