pg_replication: query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag" master: true metrics: - lag: usage: "GAUGE" description: "Replication lag behind master in seconds" pg_postmaster: query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()" master: true metrics: - start_time_seconds: usage: "GAUGE" description: "Time at which postmaster started" pg_stat_user_tables: query: | SELECT current_database() datname, schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables metrics: - datname: usage: "LABEL" description: "Name of current database" - schemaname: usage: "LABEL" description: "Name of the schema that this table is in" - relname: usage: "LABEL" description: "Name of this table" - seq_scan: usage: "COUNTER" description: "Number of sequential scans initiated on this table" - seq_tup_read: usage: "COUNTER" description: "Number of live rows fetched by sequential scans" - idx_scan: usage: "COUNTER" description: "Number of index scans initiated on this table" - idx_tup_fetch: usage: "COUNTER" description: "Number of live rows fetched by index scans" - n_tup_ins: usage: "COUNTER" description: "Number of rows inserted" - n_tup_upd: usage: "COUNTER" description: "Number of rows updated" - n_tup_del: usage: "COUNTER" description: "Number of rows deleted" - n_tup_hot_upd: usage: "COUNTER" description: "Number of rows HOT updated (i.e., with no separate index update required)" - n_live_tup: usage: "GAUGE" description: "Estimated number of live rows" - n_dead_tup: usage: "GAUGE" description: "Estimated number of dead rows" - n_mod_since_analyze: usage: "GAUGE" description: "Estimated number of rows changed since last analyze" - last_vacuum: usage: "GAUGE" description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)" - last_autovacuum: usage: "GAUGE" description: "Last time at which this table was vacuumed by the autovacuum daemon" - last_analyze: usage: "GAUGE" description: "Last time at which this table was manually analyzed" - last_autoanalyze: usage: "GAUGE" description: "Last time at which this table was analyzed by the autovacuum daemon" - vacuum_count: usage: "COUNTER" description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)" - autovacuum_count: usage: "COUNTER" description: "Number of times this table has been vacuumed by the autovacuum daemon" - analyze_count: usage: "COUNTER" description: "Number of times this table has been manually analyzed" - autoanalyze_count: usage: "COUNTER" description: "Number of times this table has been analyzed by the autovacuum daemon" pg_statio_user_tables: query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables" metrics: - datname: usage: "LABEL" description: "Name of current database" - schemaname: usage: "LABEL" description: "Name of the schema that this table is in" - relname: usage: "LABEL" description: "Name of this table" - heap_blks_read: usage: "COUNTER" description: "Number of disk blocks read from this table" - heap_blks_hit: usage: "COUNTER" description: "Number of buffer hits in this table" - idx_blks_read: usage: "COUNTER" description: "Number of disk blocks read from all indexes on this table" - idx_blks_hit: usage: "COUNTER" description: "Number of buffer hits in all indexes on this table" - toast_blks_read: usage: "COUNTER" description: "Number of disk blocks read from this table's TOAST table (if any)" - toast_blks_hit: usage: "COUNTER" description: "Number of buffer hits in this table's TOAST table (if any)" - tidx_blks_read: usage: "COUNTER" description: "Number of disk blocks read from this table's TOAST table indexes (if any)" - tidx_blks_hit: usage: "COUNTER" description: "Number of buffer hits in this table's TOAST table indexes (if any)" # # WARNING: # This set of metrics can be very expensive on a busy server as every # unique query executed will create an additional time series # # pg_stat_statements: # query: | # SELECT # pg_get_userbyid(userid) as user, # pg_database.datname, # pg_stat_statements.queryid, # pg_stat_statements.calls as calls_total, # pg_stat_statements.total_time / 1000.0 as seconds_total, # pg_stat_statements.rows as rows_total, # pg_stat_statements.blk_read_time / 1000.0 as block_read_seconds_total, # pg_stat_statements.blk_write_time / 1000.0 as block_write_seconds_total # FROM pg_stat_statements # JOIN pg_database # ON pg_database.oid = pg_stat_statements.dbid # WHERE # total_time > ( # SELECT percentile_cont(0.1) # WITHIN GROUP (ORDER BY total_time) # FROM pg_stat_statements # ) # ORDER BY seconds_total DESC # LIMIT 100 # metrics: # - user: # usage: "LABEL" # description: "The user who executed the statement" # - datname: # usage: "LABEL" # description: "The database in which the statement was executed" # - queryid: # usage: "LABEL" # description: "Internal hash code, computed from the statement's parse tree" # - calls_total: # usage: "COUNTER" # description: "Number of times executed" # - seconds_total: # usage: "COUNTER" # description: "Total time spent in the statement, in seconds" # - rows_total: # usage: "COUNTER" # description: "Total number of rows retrieved or affected by the statement" # - block_read_seconds_total: # usage: "COUNTER" # description: "Total time the statement spent reading blocks, in seconds" # - block_write_seconds_total: # usage: "COUNTER" # description: "Total time the statement spent writing blocks, in seconds" pg_process_idle: query: | WITH metrics AS ( SELECT application_name, SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum, COUNT(*) AS process_idle_seconds_count FROM pg_stat_activity WHERE state = 'idle' GROUP BY application_name ), buckets AS ( SELECT application_name, le, SUM( CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le THEN 1 ELSE 0 END )::bigint AS bucket FROM pg_stat_activity, UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le GROUP BY application_name, le ORDER BY application_name, le ) SELECT application_name, process_idle_seconds_sum as seconds_sum, process_idle_seconds_count as seconds_count, ARRAY_AGG(le) AS seconds, ARRAY_AGG(bucket) AS seconds_bucket FROM metrics JOIN buckets USING (application_name) GROUP BY 1, 2, 3 metrics: - application_name: usage: "LABEL" description: "Application Name" - seconds: usage: "HISTOGRAM" description: "Idle time of server processes"