mirror of
https://github.com/prometheus-community/postgres_exporter
synced 2025-04-01 22:48:15 +00:00
Make the example queries.yaml `pg_stat_statements` query safer. * Select the top 10% of queries by total query time. * Only expose the top 100 queries by total query time. * Keep only the most useful metrics. * Comment out the example by default. Fixes: https://github.com/prometheus-community/postgres_exporter/issues/549 Signed-off-by: SuperQ <superq@gmail.com>
245 lines
8.4 KiB
YAML
245 lines
8.4 KiB
YAML
pg_replication:
|
|
query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag"
|
|
master: true
|
|
metrics:
|
|
- lag:
|
|
usage: "GAUGE"
|
|
description: "Replication lag behind master in seconds"
|
|
|
|
pg_postmaster:
|
|
query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
|
|
master: true
|
|
metrics:
|
|
- start_time_seconds:
|
|
usage: "GAUGE"
|
|
description: "Time at which postmaster started"
|
|
|
|
pg_stat_user_tables:
|
|
query: |
|
|
SELECT
|
|
current_database() datname,
|
|
schemaname,
|
|
relname,
|
|
seq_scan,
|
|
seq_tup_read,
|
|
idx_scan,
|
|
idx_tup_fetch,
|
|
n_tup_ins,
|
|
n_tup_upd,
|
|
n_tup_del,
|
|
n_tup_hot_upd,
|
|
n_live_tup,
|
|
n_dead_tup,
|
|
n_mod_since_analyze,
|
|
COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum,
|
|
COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum,
|
|
COALESCE(last_analyze, '1970-01-01Z') as last_analyze,
|
|
COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze,
|
|
vacuum_count,
|
|
autovacuum_count,
|
|
analyze_count,
|
|
autoanalyze_count
|
|
FROM
|
|
pg_stat_user_tables
|
|
metrics:
|
|
- datname:
|
|
usage: "LABEL"
|
|
description: "Name of current database"
|
|
- schemaname:
|
|
usage: "LABEL"
|
|
description: "Name of the schema that this table is in"
|
|
- relname:
|
|
usage: "LABEL"
|
|
description: "Name of this table"
|
|
- seq_scan:
|
|
usage: "COUNTER"
|
|
description: "Number of sequential scans initiated on this table"
|
|
- seq_tup_read:
|
|
usage: "COUNTER"
|
|
description: "Number of live rows fetched by sequential scans"
|
|
- idx_scan:
|
|
usage: "COUNTER"
|
|
description: "Number of index scans initiated on this table"
|
|
- idx_tup_fetch:
|
|
usage: "COUNTER"
|
|
description: "Number of live rows fetched by index scans"
|
|
- n_tup_ins:
|
|
usage: "COUNTER"
|
|
description: "Number of rows inserted"
|
|
- n_tup_upd:
|
|
usage: "COUNTER"
|
|
description: "Number of rows updated"
|
|
- n_tup_del:
|
|
usage: "COUNTER"
|
|
description: "Number of rows deleted"
|
|
- n_tup_hot_upd:
|
|
usage: "COUNTER"
|
|
description: "Number of rows HOT updated (i.e., with no separate index update required)"
|
|
- n_live_tup:
|
|
usage: "GAUGE"
|
|
description: "Estimated number of live rows"
|
|
- n_dead_tup:
|
|
usage: "GAUGE"
|
|
description: "Estimated number of dead rows"
|
|
- n_mod_since_analyze:
|
|
usage: "GAUGE"
|
|
description: "Estimated number of rows changed since last analyze"
|
|
- last_vacuum:
|
|
usage: "GAUGE"
|
|
description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
|
|
- last_autovacuum:
|
|
usage: "GAUGE"
|
|
description: "Last time at which this table was vacuumed by the autovacuum daemon"
|
|
- last_analyze:
|
|
usage: "GAUGE"
|
|
description: "Last time at which this table was manually analyzed"
|
|
- last_autoanalyze:
|
|
usage: "GAUGE"
|
|
description: "Last time at which this table was analyzed by the autovacuum daemon"
|
|
- vacuum_count:
|
|
usage: "COUNTER"
|
|
description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
|
|
- autovacuum_count:
|
|
usage: "COUNTER"
|
|
description: "Number of times this table has been vacuumed by the autovacuum daemon"
|
|
- analyze_count:
|
|
usage: "COUNTER"
|
|
description: "Number of times this table has been manually analyzed"
|
|
- autoanalyze_count:
|
|
usage: "COUNTER"
|
|
description: "Number of times this table has been analyzed by the autovacuum daemon"
|
|
|
|
pg_statio_user_tables:
|
|
query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"
|
|
metrics:
|
|
- datname:
|
|
usage: "LABEL"
|
|
description: "Name of current database"
|
|
- schemaname:
|
|
usage: "LABEL"
|
|
description: "Name of the schema that this table is in"
|
|
- relname:
|
|
usage: "LABEL"
|
|
description: "Name of this table"
|
|
- heap_blks_read:
|
|
usage: "COUNTER"
|
|
description: "Number of disk blocks read from this table"
|
|
- heap_blks_hit:
|
|
usage: "COUNTER"
|
|
description: "Number of buffer hits in this table"
|
|
- idx_blks_read:
|
|
usage: "COUNTER"
|
|
description: "Number of disk blocks read from all indexes on this table"
|
|
- idx_blks_hit:
|
|
usage: "COUNTER"
|
|
description: "Number of buffer hits in all indexes on this table"
|
|
- toast_blks_read:
|
|
usage: "COUNTER"
|
|
description: "Number of disk blocks read from this table's TOAST table (if any)"
|
|
- toast_blks_hit:
|
|
usage: "COUNTER"
|
|
description: "Number of buffer hits in this table's TOAST table (if any)"
|
|
- tidx_blks_read:
|
|
usage: "COUNTER"
|
|
description: "Number of disk blocks read from this table's TOAST table indexes (if any)"
|
|
- tidx_blks_hit:
|
|
usage: "COUNTER"
|
|
description: "Number of buffer hits in this table's TOAST table indexes (if any)"
|
|
|
|
#
|
|
# WARNING:
|
|
# This set of metrics can be very expensive on a busy server as every
|
|
# unique query executed will create an additional time series
|
|
#
|
|
# pg_stat_statements:
|
|
# query: |
|
|
# SELECT
|
|
# pg_get_userbyid(userid) as user,
|
|
# pg_database.datname,
|
|
# pg_stat_statements.queryid,
|
|
# pg_stat_statements.calls as calls_total,
|
|
# pg_stat_statements.total_time / 1000.0 as seconds_total,
|
|
# pg_stat_statements.rows as rows_total,
|
|
# pg_stat_statements.blk_read_time / 1000.0 as block_read_seconds_total,
|
|
# pg_stat_statements.blk_write_time / 1000.0 as block_write_seconds_total
|
|
# FROM pg_stat_statements
|
|
# JOIN pg_database
|
|
# ON pg_database.oid = pg_stat_statements.dbid
|
|
# WHERE
|
|
# total_time > (
|
|
# SELECT percentile_cont(0.1)
|
|
# WITHIN GROUP (ORDER BY total_time)
|
|
# FROM pg_stat_statements
|
|
# )
|
|
# ORDER BY seconds_total DESC
|
|
# LIMIT 100
|
|
# metrics:
|
|
# - user:
|
|
# usage: "LABEL"
|
|
# description: "The user who executed the statement"
|
|
# - datname:
|
|
# usage: "LABEL"
|
|
# description: "The database in which the statement was executed"
|
|
# - queryid:
|
|
# usage: "LABEL"
|
|
# description: "Internal hash code, computed from the statement's parse tree"
|
|
# - calls_total:
|
|
# usage: "COUNTER"
|
|
# description: "Number of times executed"
|
|
# - seconds_total:
|
|
# usage: "COUNTER"
|
|
# description: "Total time spent in the statement, in seconds"
|
|
# - rows_total:
|
|
# usage: "COUNTER"
|
|
# description: "Total number of rows retrieved or affected by the statement"
|
|
# - block_read_seconds_total:
|
|
# usage: "COUNTER"
|
|
# description: "Total time the statement spent reading blocks, in seconds"
|
|
# - block_write_seconds_total:
|
|
# usage: "COUNTER"
|
|
# description: "Total time the statement spent writing blocks, in seconds"
|
|
|
|
pg_process_idle:
|
|
query: |
|
|
WITH
|
|
metrics AS (
|
|
SELECT
|
|
application_name,
|
|
SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,
|
|
COUNT(*) AS process_idle_seconds_count
|
|
FROM pg_stat_activity
|
|
WHERE state = 'idle'
|
|
GROUP BY application_name
|
|
),
|
|
buckets AS (
|
|
SELECT
|
|
application_name,
|
|
le,
|
|
SUM(
|
|
CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
|
|
THEN 1
|
|
ELSE 0
|
|
END
|
|
)::bigint AS bucket
|
|
FROM
|
|
pg_stat_activity,
|
|
UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
|
|
GROUP BY application_name, le
|
|
ORDER BY application_name, le
|
|
)
|
|
SELECT
|
|
application_name,
|
|
process_idle_seconds_sum as seconds_sum,
|
|
process_idle_seconds_count as seconds_count,
|
|
ARRAY_AGG(le) AS seconds,
|
|
ARRAY_AGG(bucket) AS seconds_bucket
|
|
FROM metrics JOIN buckets USING (application_name)
|
|
GROUP BY 1, 2, 3
|
|
metrics:
|
|
- application_name:
|
|
usage: "LABEL"
|
|
description: "Application Name"
|
|
- seconds:
|
|
usage: "HISTOGRAM"
|
|
description: "Idle time of server processes"
|