Reduce cardinality of pg_stat_statements

Make the example queries.yaml `pg_stat_statements` query safer.
* Select the top 10% of queries by total query time.
* Only expose the top 100 queries by total query time.
* Keep only the most useful metrics.
* Comment out the example by default.

Fixes: https://github.com/prometheus-community/postgres_exporter/issues/549

Signed-off-by: SuperQ <superq@gmail.com>
This commit is contained in:
SuperQ 2023-03-06 09:34:22 +01:00
parent d273f97b72
commit e7f58a42e8
No known key found for this signature in database
GPG Key ID: C646B23C9E3245F1

View File

@ -146,77 +146,58 @@ pg_statio_user_tables:
usage: "COUNTER" usage: "COUNTER"
description: "Number of buffer hits in this table's TOAST table indexes (if any)" description: "Number of buffer hits in this table's TOAST table indexes (if any)"
# WARNING: This set of metrics can be very expensive on a busy server as every unique query executed will create an additional time series #
pg_stat_statements: # WARNING:
query: "SELECT t2.rolname, t3.datname, queryid, calls, total_time / 1000 as total_time_seconds, min_time / 1000 as min_time_seconds, max_time / 1000 as max_time_seconds, mean_time / 1000 as mean_time_seconds, stddev_time / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin'" # This set of metrics can be very expensive on a busy server as every
master: true # unique query executed will create an additional time series
metrics: #
- rolname: # pg_stat_statements:
usage: "LABEL" # query: |
description: "Name of user" # SELECT
- datname: # pg_get_userbyid(userid) as user,
usage: "LABEL" # pg_database.datname,
description: "Name of database" # pg_stat_statements.queryid,
- queryid: # pg_stat_statements.calls as calls_total,
usage: "LABEL" # pg_stat_statements.total_time / 1000.0 as seconds_total,
description: "Query ID" # pg_stat_statements.rows as rows_total,
- calls: # pg_stat_statements.blk_read_time / 1000.0 as block_read_seconds_total,
usage: "COUNTER" # pg_stat_statements.blk_write_time / 1000.0 as block_write_seconds_total
description: "Number of times executed" # FROM pg_stat_statements
- total_time_seconds: # JOIN pg_database
usage: "COUNTER" # ON pg_database.oid = pg_stat_statements.dbid
description: "Total time spent in the statement, in milliseconds" # WHERE
- min_time_seconds: # total_time > (
usage: "GAUGE" # SELECT percentile_cont(0.1)
description: "Minimum time spent in the statement, in milliseconds" # WITHIN GROUP (ORDER BY total_time)
- max_time_seconds: # FROM pg_stat_statements
usage: "GAUGE" # )
description: "Maximum time spent in the statement, in milliseconds" # ORDER BY seconds_total DESC
- mean_time_seconds: # LIMIT 100
usage: "GAUGE" # metrics:
description: "Mean time spent in the statement, in milliseconds" # - user:
- stddev_time_seconds: # usage: "LABEL"
usage: "GAUGE" # description: "The user who executed the statement"
description: "Population standard deviation of time spent in the statement, in milliseconds" # - datname:
- rows: # usage: "LABEL"
usage: "COUNTER" # description: "The database in which the statement was executed"
description: "Total number of rows retrieved or affected by the statement" # - queryid:
- shared_blks_hit: # usage: "LABEL"
usage: "COUNTER" # description: "Internal hash code, computed from the statement's parse tree"
description: "Total number of shared block cache hits by the statement" # - calls_total:
- shared_blks_read: # usage: "COUNTER"
usage: "COUNTER" # description: "Number of times executed"
description: "Total number of shared blocks read by the statement" # - seconds_total:
- shared_blks_dirtied: # usage: "COUNTER"
usage: "COUNTER" # description: "Total time spent in the statement, in seconds"
description: "Total number of shared blocks dirtied by the statement" # - rows_total:
- shared_blks_written: # usage: "COUNTER"
usage: "COUNTER" # description: "Total number of rows retrieved or affected by the statement"
description: "Total number of shared blocks written by the statement" # - block_read_seconds_total:
- local_blks_hit: # usage: "COUNTER"
usage: "COUNTER" # description: "Total time the statement spent reading blocks, in seconds"
description: "Total number of local block cache hits by the statement" # - block_write_seconds_total:
- local_blks_read: # usage: "COUNTER"
usage: "COUNTER" # description: "Total time the statement spent writing blocks, in seconds"
description: "Total number of local blocks read by the statement"
- local_blks_dirtied:
usage: "COUNTER"
description: "Total number of local blocks dirtied by the statement"
- local_blks_written:
usage: "COUNTER"
description: "Total number of local blocks written by the statement"
- temp_blks_read:
usage: "COUNTER"
description: "Total number of temp blocks read by the statement"
- temp_blks_written:
usage: "COUNTER"
description: "Total number of temp blocks written by the statement"
- blk_read_time_seconds:
usage: "COUNTER"
description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
- blk_write_time_seconds:
usage: "COUNTER"
description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
pg_process_idle: pg_process_idle:
query: | query: |