mirror of
https://github.com/prometheus-community/postgres_exporter
synced 2025-04-26 04:58:05 +00:00
fix master database behavior
This commit is contained in:
parent
9b13f5ec57
commit
deac1c37db
@ -166,7 +166,7 @@ flag. This removes all built-in metrics, and uses only metrics defined by querie
|
|||||||
|
|
||||||
### Automatically discover databases
|
### Automatically discover databases
|
||||||
To scrape metrics from all databases on a database server, the database DSN's can be dynamically discovered via the
|
To scrape metrics from all databases on a database server, the database DSN's can be dynamically discovered via the
|
||||||
`--auto-discover-databases` flag. When true, `SELECT datname FROM pg_database WHERE datallowconn = true AND datistemplate = false` is run for all configured DSN's. From the
|
`--auto-discover-databases` flag. When true, `SELECT datname FROM pg_database WHERE datallowconn = true AND datistemplate = false and datname != current_database()` is run for all configured DSN's. From the
|
||||||
result a new set of DSN's is created for which the metrics are scraped.
|
result a new set of DSN's is created for which the metrics are scraped.
|
||||||
|
|
||||||
In addition, the option `--exclude-databases` adds the possibily to filter the result from the auto discovery to discard databases you do not need.
|
In addition, the option `--exclude-databases` adds the possibily to filter the result from the auto discovery to discard databases you do not need.
|
||||||
|
@ -100,6 +100,7 @@ type Mapping map[string]MappingOptions
|
|||||||
type UserQuery struct {
|
type UserQuery struct {
|
||||||
Query string `yaml:"query"`
|
Query string `yaml:"query"`
|
||||||
Metrics []Mapping `yaml:"metrics"`
|
Metrics []Mapping `yaml:"metrics"`
|
||||||
|
Master bool `yaml:"master"` // Querying only for master database
|
||||||
CacheSeconds uint64 `yaml:"cache_seconds"` // Number of seconds to cache the namespace result metrics for.
|
CacheSeconds uint64 `yaml:"cache_seconds"` // Number of seconds to cache the namespace result metrics for.
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,6 +140,7 @@ func (cm *ColumnMapping) UnmarshalYAML(unmarshal func(interface{}) error) error
|
|||||||
// This is mainly so we can parse cacheSeconds around.
|
// This is mainly so we can parse cacheSeconds around.
|
||||||
type intermediateMetricMap struct {
|
type intermediateMetricMap struct {
|
||||||
columnMappings map[string]ColumnMapping
|
columnMappings map[string]ColumnMapping
|
||||||
|
master bool
|
||||||
cacheSeconds uint64
|
cacheSeconds uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -146,6 +148,7 @@ type intermediateMetricMap struct {
|
|||||||
type MetricMapNamespace struct {
|
type MetricMapNamespace struct {
|
||||||
labels []string // Label names for this namespace
|
labels []string // Label names for this namespace
|
||||||
columnMappings map[string]MetricMap // Column mappings in this namespace
|
columnMappings map[string]MetricMap // Column mappings in this namespace
|
||||||
|
master bool // Call query only for master database
|
||||||
cacheSeconds uint64 // Number of seconds this metric namespace can be cached. 0 disables.
|
cacheSeconds uint64 // Number of seconds this metric namespace can be cached. 0 disables.
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -211,6 +214,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
|
|||||||
"blk_write_time": {COUNTER, "Time spent writing data file blocks by backends in this database, in milliseconds", nil, nil},
|
"blk_write_time": {COUNTER, "Time spent writing data file blocks by backends in this database, in milliseconds", nil, nil},
|
||||||
"stats_reset": {COUNTER, "Time at which these statistics were last reset", nil, nil},
|
"stats_reset": {COUNTER, "Time at which these statistics were last reset", nil, nil},
|
||||||
},
|
},
|
||||||
|
true,
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
"pg_stat_database_conflicts": {
|
"pg_stat_database_conflicts": {
|
||||||
@ -223,6 +227,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
|
|||||||
"confl_bufferpin": {COUNTER, "Number of queries in this database that have been canceled due to pinned buffers", nil, nil},
|
"confl_bufferpin": {COUNTER, "Number of queries in this database that have been canceled due to pinned buffers", nil, nil},
|
||||||
"confl_deadlock": {COUNTER, "Number of queries in this database that have been canceled due to deadlocks", nil, nil},
|
"confl_deadlock": {COUNTER, "Number of queries in this database that have been canceled due to deadlocks", nil, nil},
|
||||||
},
|
},
|
||||||
|
true,
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
"pg_locks": {
|
"pg_locks": {
|
||||||
@ -231,6 +236,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
|
|||||||
"mode": {LABEL, "Type of Lock", nil, nil},
|
"mode": {LABEL, "Type of Lock", nil, nil},
|
||||||
"count": {GAUGE, "Number of locks", nil, nil},
|
"count": {GAUGE, "Number of locks", nil, nil},
|
||||||
},
|
},
|
||||||
|
true,
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
"pg_stat_replication": {
|
"pg_stat_replication": {
|
||||||
@ -276,6 +282,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
|
|||||||
"flush_lag": {DISCARD, "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (but not yet applied it). This can be used to gauge the delay that synchronous_commit level remote_flush incurred while committing if this server was configured as a synchronous standby.", nil, semver.MustParseRange(">=10.0.0")},
|
"flush_lag": {DISCARD, "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (but not yet applied it). This can be used to gauge the delay that synchronous_commit level remote_flush incurred while committing if this server was configured as a synchronous standby.", nil, semver.MustParseRange(">=10.0.0")},
|
||||||
"replay_lag": {DISCARD, "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it. This can be used to gauge the delay that synchronous_commit level remote_apply incurred while committing if this server was configured as a synchronous standby.", nil, semver.MustParseRange(">=10.0.0")},
|
"replay_lag": {DISCARD, "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it. This can be used to gauge the delay that synchronous_commit level remote_apply incurred while committing if this server was configured as a synchronous standby.", nil, semver.MustParseRange(">=10.0.0")},
|
||||||
},
|
},
|
||||||
|
true,
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
"pg_stat_activity": {
|
"pg_stat_activity": {
|
||||||
@ -285,6 +292,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
|
|||||||
"count": {GAUGE, "number of connections in this state", nil, nil},
|
"count": {GAUGE, "number of connections in this state", nil, nil},
|
||||||
"max_tx_duration": {GAUGE, "max duration in seconds any active transaction has been running", nil, nil},
|
"max_tx_duration": {GAUGE, "max duration in seconds any active transaction has been running", nil, nil},
|
||||||
},
|
},
|
||||||
|
true,
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -444,6 +452,7 @@ func parseUserQueries(content []byte) (map[string]intermediateMetricMap, map[str
|
|||||||
newMetricMap := make(map[string]ColumnMapping)
|
newMetricMap := make(map[string]ColumnMapping)
|
||||||
metricMap = intermediateMetricMap{
|
metricMap = intermediateMetricMap{
|
||||||
columnMappings: newMetricMap,
|
columnMappings: newMetricMap,
|
||||||
|
master: specs.Master,
|
||||||
cacheSeconds: specs.CacheSeconds,
|
cacheSeconds: specs.CacheSeconds,
|
||||||
}
|
}
|
||||||
metricMaps[metric] = metricMap
|
metricMaps[metric] = metricMap
|
||||||
@ -614,7 +623,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
metricMap[namespace] = MetricMapNamespace{variableLabels, thisMap, intermediateMappings.cacheSeconds}
|
metricMap[namespace] = MetricMapNamespace{variableLabels, thisMap, intermediateMappings.master, intermediateMappings.cacheSeconds}
|
||||||
}
|
}
|
||||||
|
|
||||||
return metricMap
|
return metricMap
|
||||||
@ -857,7 +866,7 @@ func (s *Server) Scrape(ch chan<- prometheus.Metric, disableSettingsMetrics bool
|
|||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
if (!disableSettingsMetrics && !*autoDiscoverDatabases) || (!disableSettingsMetrics && *autoDiscoverDatabases && s.master) {
|
if !disableSettingsMetrics && s.master {
|
||||||
if err = querySettings(ch, s); err != nil {
|
if err = querySettings(ch, s); err != nil {
|
||||||
err = fmt.Errorf("error retrieving settings: %s", err)
|
err = fmt.Errorf("error retrieving settings: %s", err)
|
||||||
}
|
}
|
||||||
@ -1257,6 +1266,12 @@ func queryNamespaceMappings(ch chan<- prometheus.Metric, server *Server) map[str
|
|||||||
|
|
||||||
for namespace, mapping := range server.metricMap {
|
for namespace, mapping := range server.metricMap {
|
||||||
log.Debugln("Querying namespace: ", namespace)
|
log.Debugln("Querying namespace: ", namespace)
|
||||||
|
|
||||||
|
if mapping.master && !server.master {
|
||||||
|
log.Debugln("Query skipped...")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
scrapeMetric := false
|
scrapeMetric := false
|
||||||
// Check if the metric is cached
|
// Check if the metric is cached
|
||||||
server.cacheMtx.Lock()
|
server.cacheMtx.Lock()
|
||||||
@ -1335,12 +1350,13 @@ func (e *Exporter) checkMapVersions(ch chan<- prometheus.Metric, server *Server)
|
|||||||
log.Infof("Semantic Version Changed on %q: %s -> %s", server, server.lastMapVersion, semanticVersion)
|
log.Infof("Semantic Version Changed on %q: %s -> %s", server, server.lastMapVersion, semanticVersion)
|
||||||
server.mappingMtx.Lock()
|
server.mappingMtx.Lock()
|
||||||
|
|
||||||
if e.disableDefaultMetrics || (!e.disableDefaultMetrics && e.autoDiscoverDatabases && !server.master) {
|
// Get Default Metrics only for master database
|
||||||
server.metricMap = make(map[string]MetricMapNamespace)
|
if !e.disableDefaultMetrics && server.master {
|
||||||
server.queryOverrides = make(map[string]string)
|
|
||||||
} else {
|
|
||||||
server.metricMap = makeDescMap(semanticVersion, server.labels, e.builtinMetricMaps)
|
server.metricMap = makeDescMap(semanticVersion, server.labels, e.builtinMetricMaps)
|
||||||
server.queryOverrides = makeQueryOverrideMap(semanticVersion, queryOverrides)
|
server.queryOverrides = makeQueryOverrideMap(semanticVersion, queryOverrides)
|
||||||
|
} else {
|
||||||
|
server.metricMap = make(map[string]MetricMapNamespace)
|
||||||
|
server.queryOverrides = make(map[string]string)
|
||||||
}
|
}
|
||||||
|
|
||||||
server.lastMapVersion = semanticVersion
|
server.lastMapVersion = semanticVersion
|
||||||
@ -1370,11 +1386,11 @@ func (e *Exporter) checkMapVersions(ch chan<- prometheus.Metric, server *Server)
|
|||||||
server.mappingMtx.Unlock()
|
server.mappingMtx.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output the version as a special metric
|
// Output the version as a special metric only for master database
|
||||||
versionDesc := prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, staticLabelName),
|
versionDesc := prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, staticLabelName),
|
||||||
"Version string as reported by postgres", []string{"version", "short_version"}, server.labels)
|
"Version string as reported by postgres", []string{"version", "short_version"}, server.labels)
|
||||||
|
|
||||||
if !e.disableDefaultMetrics && (server.master && e.autoDiscoverDatabases) {
|
if !e.disableDefaultMetrics && server.master {
|
||||||
ch <- prometheus.MustNewConstMetric(versionDesc,
|
ch <- prometheus.MustNewConstMetric(versionDesc,
|
||||||
prometheus.UntypedValue, 1, versionString, semanticVersion.String())
|
prometheus.UntypedValue, 1, versionString, semanticVersion.String())
|
||||||
}
|
}
|
||||||
@ -1439,6 +1455,7 @@ func (e *Exporter) discoverDatabaseDSNs() []string {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If autoDiscoverDatabases is true, set first dsn as master database (Default: false)
|
||||||
server.master = true
|
server.master = true
|
||||||
|
|
||||||
databaseNames, err := queryDatabases(server)
|
databaseNames, err := queryDatabases(server)
|
||||||
@ -1467,6 +1484,12 @@ func (e *Exporter) discoverDatabaseDSNs() []string {
|
|||||||
|
|
||||||
func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) error {
|
func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) error {
|
||||||
server, err := e.servers.GetServer(dsn)
|
server, err := e.servers.GetServer(dsn)
|
||||||
|
|
||||||
|
// Check if autoDiscoverDatabases is false, set dsn as master database (Default: false)
|
||||||
|
if !e.autoDiscoverDatabases {
|
||||||
|
server.master = true
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &ErrorConnectToServer{fmt.Sprintf("Error opening connection to database (%s): %s", loggableDSN(dsn), err.Error())}
|
return &ErrorConnectToServer{fmt.Sprintf("Error opening connection to database (%s): %s", loggableDSN(dsn), err.Error())}
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@ func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) {
|
|||||||
"metric_which_stays": {COUNTER, "This metric should not be eliminated", nil, nil},
|
"metric_which_stays": {COUNTER, "This metric should not be eliminated", nil, nil},
|
||||||
"metric_which_discards": {COUNTER, "This metric should be forced to DISCARD", nil, nil},
|
"metric_which_discards": {COUNTER, "This metric should be forced to DISCARD", nil, nil},
|
||||||
},
|
},
|
||||||
|
true,
|
||||||
0,
|
0,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
27
queries.yaml
27
queries.yaml
@ -1,5 +1,6 @@
|
|||||||
pg_replication:
|
pg_replication:
|
||||||
query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) as lag"
|
query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) as lag"
|
||||||
|
master: true
|
||||||
metrics:
|
metrics:
|
||||||
- lag:
|
- lag:
|
||||||
usage: "GAUGE"
|
usage: "GAUGE"
|
||||||
@ -7,14 +8,18 @@ pg_replication:
|
|||||||
|
|
||||||
pg_postmaster:
|
pg_postmaster:
|
||||||
query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
|
query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
|
||||||
|
master: true
|
||||||
metrics:
|
metrics:
|
||||||
- start_time_seconds:
|
- start_time_seconds:
|
||||||
usage: "GAUGE"
|
usage: "GAUGE"
|
||||||
description: "Time at which postmaster started"
|
description: "Time at which postmaster started"
|
||||||
|
|
||||||
pg_stat_user_tables:
|
pg_stat_user_tables:
|
||||||
query: "SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z'), COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables"
|
query: "SELECT current_database() datname, schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z'), COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables"
|
||||||
metrics:
|
metrics:
|
||||||
|
- datname:
|
||||||
|
usage: "LABEL"
|
||||||
|
description: "Name of current database"
|
||||||
- schemaname:
|
- schemaname:
|
||||||
usage: "LABEL"
|
usage: "LABEL"
|
||||||
description: "Name of the schema that this table is in"
|
description: "Name of the schema that this table is in"
|
||||||
@ -80,8 +85,11 @@ pg_stat_user_tables:
|
|||||||
description: "Number of times this table has been analyzed by the autovacuum daemon"
|
description: "Number of times this table has been analyzed by the autovacuum daemon"
|
||||||
|
|
||||||
pg_statio_user_tables:
|
pg_statio_user_tables:
|
||||||
query: "SELECT schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"
|
query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"
|
||||||
metrics:
|
metrics:
|
||||||
|
- datname:
|
||||||
|
usage: "LABEL"
|
||||||
|
description: "Name of current database"
|
||||||
- schemaname:
|
- schemaname:
|
||||||
usage: "LABEL"
|
usage: "LABEL"
|
||||||
description: "Name of the schema that this table is in"
|
description: "Name of the schema that this table is in"
|
||||||
@ -115,6 +123,7 @@ pg_statio_user_tables:
|
|||||||
|
|
||||||
pg_database:
|
pg_database:
|
||||||
query: "SELECT pg_database.datname, pg_database_size(pg_database.datname) as size FROM pg_database"
|
query: "SELECT pg_database.datname, pg_database_size(pg_database.datname) as size FROM pg_database"
|
||||||
|
master: true
|
||||||
cache_seconds: 30
|
cache_seconds: 30
|
||||||
metrics:
|
metrics:
|
||||||
- datname:
|
- datname:
|
||||||
@ -124,13 +133,19 @@ pg_database:
|
|||||||
usage: "GAUGE"
|
usage: "GAUGE"
|
||||||
description: "Disk space used by the database"
|
description: "Disk space used by the database"
|
||||||
|
|
||||||
|
|
||||||
pg_stat_statements:
|
pg_stat_statements:
|
||||||
query: "SELECT query, calls, total_time / 1000 as total_time_seconds, min_time / 1000 as min_time_seconds, max_time / 1000 as max_time_seconds, mean_time / 1000 as mean_time_seconds, stddev_time / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements"
|
query: "SELECT t2.rolname, t3.datname, queryid, calls, total_time / 1000 as total_time_seconds, min_time / 1000 as min_time_seconds, max_time / 1000 as max_time_seconds, mean_time / 1000 as mean_time_seconds, stddev_time / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 join pg_roles t2 on (t1.userid=t2.oid) join pg_database t3 on (t1.dbid=t3.oid)"
|
||||||
|
master: true
|
||||||
metrics:
|
metrics:
|
||||||
- query:
|
- rolname:
|
||||||
usage: "LABEL"
|
usage: "LABEL"
|
||||||
description: "Query class"
|
description: "Name of user"
|
||||||
|
- datname:
|
||||||
|
usage: "LABEL"
|
||||||
|
description: "Name of database"
|
||||||
|
- queryid:
|
||||||
|
usage: "LABEL"
|
||||||
|
description: "Query ID"
|
||||||
- calls:
|
- calls:
|
||||||
usage: "COUNTER"
|
usage: "COUNTER"
|
||||||
description: "Number of times executed"
|
description: "Number of times executed"
|
||||||
|
Loading…
Reference in New Issue
Block a user