Merge branch 'master' into feature/test_postgres_12

This commit is contained in:
Will Rouesnel 2021-01-31 22:23:07 +11:00 committed by GitHub
commit 178426c095
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 383 additions and 40 deletions

View File

@ -140,6 +140,9 @@ The following environment variables configure the exporter:
* `PG_EXPORTER_EXCLUDE_DATABASES`
A comma-separated list of databases to remove when autoDiscoverDatabases is enabled. Default is empty string.
* `PG_EXPORTER_METRIC_PREFIX`
A prefix to use for each of the default metrics exported by postgres-exporter. Default is `pg`
Settings set by environment variables starting with `PG_` will be overwritten by the corresponding CLI flag if given.
### Setting the Postgres server's data source name

View File

@ -53,6 +53,7 @@ var (
onlyDumpMaps = kingpin.Flag("dumpmaps", "Do not run, simply dump the maps.").Bool()
constantLabelsList = kingpin.Flag("constantLabels", "A list of label=value separated by comma(,).").Default("").Envar("PG_EXPORTER_CONSTANT_LABELS").String()
excludeDatabases = kingpin.Flag("exclude-databases", "A list of databases to remove when autoDiscoverDatabases is enabled").Default("").Envar("PG_EXPORTER_EXCLUDE_DATABASES").String()
metricPrefix = kingpin.Flag("metric-prefix", "A metric prefix can be used to have non-default (not \"pg\") prefixes for each of the metrics").Default("pg").Envar("PG_EXPORTER_METRIC_PREFIX").String()
)
// Metric name parts.
@ -80,6 +81,7 @@ const (
GAUGE ColumnUsage = iota // Use this column as a gauge
MAPPEDMETRIC ColumnUsage = iota // Use this column with the supplied mapping of text values
DURATION ColumnUsage = iota // This column should be interpreted as a text duration (and converted to milliseconds)
HISTOGRAM ColumnUsage = iota // Use this column as a histogram
)
// UnmarshalYAML implements the yaml.Unmarshaller interface.
@ -169,6 +171,7 @@ type MetricMapNamespace struct {
// be mapped to by the collector
type MetricMap struct {
discard bool // Should metric be discarded during mapping?
histogram bool // Should metric be treated as a histogram?
vtype prometheus.ValueType // Prometheus valuetype
desc *prometheus.Desc // Prometheus descriptor
conversion func(interface{}) (float64, bool) // Conversion function to turn PG result into float64
@ -376,7 +379,8 @@ var queryOverrides = map[string][]OverrideQuery{
('sharelock'),
('sharerowexclusivelock'),
('exclusivelock'),
('accessexclusivelock')
('accessexclusivelock'),
('sireadlock')
) AS tmp(mode) CROSS JOIN pg_database
LEFT JOIN
(SELECT database, lower(mode) AS mode,count(*) AS count
@ -598,6 +602,8 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri
for namespace, intermediateMappings := range metricMaps {
thisMap := make(map[string]MetricMap)
namespace = strings.Replace(namespace, "pg", *metricPrefix, 1)
// Get the constant labels
var variableLabels []string
for columnName, columnMapping := range intermediateMappings.columnMappings {
@ -650,6 +656,27 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri
return dbToFloat64(in)
},
}
case HISTOGRAM:
thisMap[columnName] = MetricMap{
histogram: true,
vtype: prometheus.UntypedValue,
desc: prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, columnName), columnMapping.description, variableLabels, serverLabels),
conversion: func(in interface{}) (float64, bool) {
return dbToFloat64(in)
},
}
thisMap[columnName+"_bucket"] = MetricMap{
histogram: true,
discard: true,
}
thisMap[columnName+"_sum"] = MetricMap{
histogram: true,
discard: true,
}
thisMap[columnName+"_count"] = MetricMap{
histogram: true,
discard: true,
}
case MAPPEDMETRIC:
thisMap[columnName] = MetricMap{
vtype: prometheus.GaugeValue,
@ -721,6 +748,9 @@ func stringToColumnUsage(s string) (ColumnUsage, error) {
case "GAUGE":
u = GAUGE
case "HISTOGRAM":
u = HISTOGRAM
case "MAPPEDMETRIC":
u = MAPPEDMETRIC
@ -772,6 +802,46 @@ func dbToFloat64(t interface{}) (float64, bool) {
}
}
// Convert database.sql types to uint64 for Prometheus consumption. Null types are mapped to 0. string and []byte
// types are mapped as 0 and !ok
func dbToUint64(t interface{}) (uint64, bool) {
switch v := t.(type) {
case uint64:
return v, true
case int64:
return uint64(v), true
case float64:
return uint64(v), true
case time.Time:
return uint64(v.Unix()), true
case []byte:
// Try and convert to string and then parse to a uint64
strV := string(v)
result, err := strconv.ParseUint(strV, 10, 64)
if err != nil {
log.Infoln("Could not parse []byte:", err)
return 0, false
}
return result, true
case string:
result, err := strconv.ParseUint(v, 10, 64)
if err != nil {
log.Infoln("Could not parse string:", err)
return 0, false
}
return result, true
case bool:
if v {
return 1, true
}
return 0, true
case nil:
return 0, true
default:
return 0, false
}
}
// Convert database.sql to string for Prometheus labels. Null types are mapped to empty strings.
func dbToString(t interface{}) (string, bool) {
switch v := t.(type) {
@ -977,7 +1047,7 @@ func (s *Servers) GetServer(dsn string) (*Server, error) {
var err error
var ok bool
errCount := 0 // start at zero because we increment before doing work
retries := 3
retries := 1
var server *Server
for {
if errCount++; errCount > retries {
@ -1167,29 +1237,6 @@ func (e *Exporter) setupInternalMetrics() {
// Describe implements prometheus.Collector.
func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
// We cannot know in advance what metrics the exporter will generate
// from Postgres. So we use the poor man's describe method: Run a collect
// and send the descriptors of all the collected metrics. The problem
// here is that we need to connect to the Postgres DB. If it is currently
// unavailable, the descriptors will be incomplete. Since this is a
// stand-alone exporter and not used as a library within other code
// implementing additional metrics, the worst that can happen is that we
// don't detect inconsistent metrics created by this exporter
// itself. Also, a change in the monitored Postgres instance may change the
// exported metrics during the runtime of the exporter.
metricCh := make(chan prometheus.Metric)
doneCh := make(chan struct{})
go func() {
for m := range metricCh {
ch <- m.Desc()
}
close(doneCh)
}()
e.Collect(metricCh)
close(metricCh)
<-doneCh
}
// Collect implements prometheus.Collector.
@ -1304,13 +1351,68 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa
continue
}
value, ok := dbToFloat64(columnData[idx])
if !ok {
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName, columnData[idx])))
continue
if metricMapping.histogram {
var keys []float64
err = pq.Array(&keys).Scan(columnData[idx])
if err != nil {
return []prometheus.Metric{}, []error{}, errors.New(fmt.Sprintln("Error retrieving", columnName, "buckets:", namespace, err))
}
var values []int64
valuesIdx, ok := columnIdx[columnName+"_bucket"]
if !ok {
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_bucket")))
continue
}
err = pq.Array(&values).Scan(columnData[valuesIdx])
if err != nil {
return []prometheus.Metric{}, []error{}, errors.New(fmt.Sprintln("Error retrieving", columnName, "bucket values:", namespace, err))
}
buckets := make(map[float64]uint64, len(keys))
for i, key := range keys {
if i >= len(values) {
break
}
buckets[key] = uint64(values[i])
}
idx, ok = columnIdx[columnName+"_sum"]
if !ok {
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_sum")))
continue
}
sum, ok := dbToFloat64(columnData[idx])
if !ok {
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_sum", columnData[idx])))
continue
}
idx, ok = columnIdx[columnName+"_count"]
if !ok {
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_count")))
continue
}
count, ok := dbToUint64(columnData[idx])
if !ok {
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_count", columnData[idx])))
continue
}
metric = prometheus.MustNewConstHistogram(
metricMapping.desc,
count, sum, buckets,
labels...,
)
} else {
value, ok := dbToFloat64(columnData[idx])
if !ok {
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName, columnData[idx])))
continue
}
// Generate the metric
metric = prometheus.MustNewConstMetric(metricMapping.desc, metricMapping.vtype, value, labels...)
}
// Generate the metric
metric = prometheus.MustNewConstMetric(metricMapping.desc, metricMapping.vtype, value, labels...)
} else {
// Unknown metric. Report as untyped if scan to float64 works, else note an error too.
metricLabel := fmt.Sprintf("%s_%s", namespace, columnName)
@ -1515,20 +1617,36 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) {
}
func (e *Exporter) discoverDatabaseDSNs() []string {
// connstring syntax is complex (and not sure if even regular).
// we don't need to parse it, so just superficially validate that it starts
// with a valid-ish keyword pair
connstringRe := regexp.MustCompile(`^ *[a-zA-Z0-9]+ *= *[^= ]+`)
dsns := make(map[string]struct{})
for _, dsn := range e.dsn {
parsedDSN, err := url.Parse(dsn)
if err != nil {
log.Errorf("Unable to parse DSN (%s): %v", loggableDSN(dsn), err)
var dsnURI *url.URL
var dsnConnstring string
if strings.HasPrefix(dsn, "postgresql://") {
var err error
dsnURI, err = url.Parse(dsn)
if err != nil {
log.Errorf("Unable to parse DSN as URI (%s): %v", loggableDSN(dsn), err)
continue
}
} else if connstringRe.MatchString(dsn) {
dsnConnstring = dsn
} else {
log.Errorf("Unable to parse DSN as either URI or connstring (%s)", loggableDSN(dsn))
continue
}
dsns[dsn] = struct{}{}
server, err := e.servers.GetServer(dsn)
if err != nil {
log.Errorf("Error opening connection to database (%s): %v", loggableDSN(dsn), err)
continue
}
dsns[dsn] = struct{}{}
// If autoDiscoverDatabases is true, set first dsn as master database (Default: false)
server.master = true
@ -1542,8 +1660,16 @@ func (e *Exporter) discoverDatabaseDSNs() []string {
if contains(e.excludeDatabases, databaseName) {
continue
}
parsedDSN.Path = databaseName
dsns[parsedDSN.String()] = struct{}{}
if dsnURI != nil {
dsnURI.Path = databaseName
dsn = dsnURI.String()
} else {
// replacing one dbname with another is complicated.
// just append new dbname to override.
dsn = fmt.Sprintf("%s dbname=%s", dsnConnstring, databaseName)
}
dsns[dsn] = struct{}{}
}
}

View File

@ -126,3 +126,26 @@ func (s *IntegrationSuite) TestUnknownMetricParsingDoesntCrash(c *C) {
// scrape the exporter and make sure it works
exporter.scrape(ch)
}
// TestExtendQueriesDoesntCrash tests that specifying extend.query-path doesn't
// crash.
func (s *IntegrationSuite) TestExtendQueriesDoesntCrash(c *C) {
// Setup a dummy channel to consume metrics
ch := make(chan prometheus.Metric, 100)
go func() {
for range ch {
}
}()
dsn := os.Getenv("DATA_SOURCE_NAME")
c.Assert(dsn, Not(Equals), "")
exporter := NewExporter(
strings.Split(dsn, ","),
WithUserQueriesPath("../user_queries_test.yaml"),
)
c.Assert(exporter, NotNil)
// scrape the exporter and make sure it works
exporter.scrape(ch)
}

View File

@ -4,9 +4,11 @@ package main
import (
"io/ioutil"
"math"
"os"
"reflect"
"testing"
"time"
"github.com/blang/semver"
"github.com/prometheus/client_golang/prometheus"
@ -287,6 +289,22 @@ func UnsetEnvironment(c *C, d string) {
c.Assert(err, IsNil)
}
type isNaNChecker struct {
*CheckerInfo
}
var IsNaN Checker = &isNaNChecker{
&CheckerInfo{Name: "IsNaN", Params: []string{"value"}},
}
func (checker *isNaNChecker) Check(params []interface{}, names []string) (result bool, error string) {
param, ok := (params[0]).(float64)
if !ok {
return false, "obtained value type is not a float"
}
return math.IsNaN(param), ""
}
// test boolean metric type gets converted to float
func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) {
@ -294,6 +312,7 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) {
input interface{}
expectedString string
expectedValue float64
expectedCount uint64
expectedOK bool
}
@ -302,19 +321,71 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) {
input: true,
expectedString: "true",
expectedValue: 1.0,
expectedCount: 1,
expectedOK: true,
},
{
input: false,
expectedString: "false",
expectedValue: 0.0,
expectedCount: 0,
expectedOK: true,
},
{
input: nil,
expectedString: "",
expectedValue: math.NaN(),
expectedCount: 0,
expectedOK: true,
},
{
input: TestCase{},
expectedString: "",
expectedValue: math.NaN(),
expectedCount: 0,
expectedOK: false,
},
{
input: 123.0,
expectedString: "123",
expectedValue: 123.0,
expectedCount: 123,
expectedOK: true,
},
{
input: "123",
expectedString: "123",
expectedValue: 123.0,
expectedCount: 123,
expectedOK: true,
},
{
input: []byte("123"),
expectedString: "123",
expectedValue: 123.0,
expectedCount: 123,
expectedOK: true,
},
{
input: time.Unix(1600000000, 0),
expectedString: "1600000000",
expectedValue: 1600000000.0,
expectedCount: 1600000000,
expectedOK: true,
},
}
for _, cs := range cases {
value, ok := dbToFloat64(cs.input)
c.Assert(value, Equals, cs.expectedValue)
if math.IsNaN(cs.expectedValue) {
c.Assert(value, IsNaN)
} else {
c.Assert(value, Equals, cs.expectedValue)
}
c.Assert(ok, Equals, cs.expectedOK)
count, ok := dbToUint64(cs.input)
c.Assert(count, Equals, cs.expectedCount)
c.Assert(ok, Equals, cs.expectedOK)
str, ok := dbToString(cs.input)

View File

@ -0,0 +1,51 @@
random:
query: |
WITH data AS (SELECT floor(random()*10) AS d FROM generate_series(1,100)),
metrics AS (SELECT SUM(d) AS sum, COUNT(*) AS count FROM data),
buckets AS (SELECT le, SUM(CASE WHEN d <= le THEN 1 ELSE 0 END) AS d
FROM data, UNNEST(ARRAY[1, 2, 4, 8]) AS le GROUP BY le)
SELECT
sum AS histogram_sum,
count AS histogram_count,
ARRAY_AGG(le) AS histogram,
ARRAY_AGG(d) AS histogram_bucket,
ARRAY_AGG(le) AS missing,
ARRAY_AGG(le) AS missing_sum,
ARRAY_AGG(d) AS missing_sum_bucket,
ARRAY_AGG(le) AS missing_count,
ARRAY_AGG(d) AS missing_count_bucket,
sum AS missing_count_sum,
ARRAY_AGG(le) AS unexpected_sum,
ARRAY_AGG(d) AS unexpected_sum_bucket,
'data' AS unexpected_sum_sum,
ARRAY_AGG(le) AS unexpected_count,
ARRAY_AGG(d) AS unexpected_count_bucket,
sum AS unexpected_count_sum,
'nan'::varchar AS unexpected_count_count,
ARRAY_AGG(le) AS unexpected_bytes,
ARRAY_AGG(d) AS unexpected_bytes_bucket,
sum AS unexpected_bytes_sum,
'nan'::bytea AS unexpected_bytes_count
FROM metrics, buckets GROUP BY 1,2
metrics:
- histogram:
usage: "HISTOGRAM"
description: "Random data"
- missing:
usage: "HISTOGRAM"
description: "nonfatal error"
- missing_sum:
usage: "HISTOGRAM"
description: "nonfatal error"
- missing_count:
usage: "HISTOGRAM"
description: "nonfatal error"
- unexpected_sum:
usage: "HISTOGRAM"
description: "nonfatal error"
- unexpected_count:
usage: "HISTOGRAM"
description: "nonfatal error"
- unexpected_bytes:
usage: "HISTOGRAM"
description: "nonfatal error"

View File

@ -1,5 +1,5 @@
pg_replication:
query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) as lag"
query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag"
master: true
metrics:
- lag:
@ -15,7 +15,32 @@ pg_postmaster:
description: "Time at which postmaster started"
pg_stat_user_tables:
query: "SELECT current_database() datname, schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z'), COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables"
query: |
SELECT
current_database() datname,
schemaname,
relname,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch,
n_tup_ins,
n_tup_upd,
n_tup_del,
n_tup_hot_upd,
n_live_tup,
n_dead_tup,
n_mod_since_analyze,
COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum,
COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum,
COALESCE(last_analyze, '1970-01-01Z') as last_analyze,
COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze,
vacuum_count,
autovacuum_count,
analyze_count,
autoanalyze_count
FROM
pg_stat_user_tables
metrics:
- datname:
usage: "LABEL"
@ -203,3 +228,47 @@ pg_stat_statements:
- blk_write_time_seconds:
usage: "COUNTER"
description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
pg_stat_activity:
query: |
WITH
metrics AS (
SELECT
application_name,
SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,
COUNT(*) AS process_idle_seconds_count
FROM pg_stat_activity
WHERE state = 'idle'
GROUP BY application_name
),
buckets AS (
SELECT
application_name,
le,
SUM(
CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
THEN 1
ELSE 0
END
)::bigint AS bucket
FROM
pg_stat_activity,
UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
GROUP BY application_name, le
ORDER BY application_name, le
)
SELECT
application_name,
process_idle_seconds_sum,
process_idle_seconds_count,
ARRAY_AGG(le) AS process_idle_seconds,
ARRAY_AGG(bucket) AS process_idle_seconds_bucket
FROM metrics JOIN buckets USING (application_name)
GROUP BY 1, 2, 3
metrics:
- application_name:
usage: "LABEL"
description: "Application Name"
- process_idle_seconds:
usage: "HISTOGRAM"
description: "Idle time of server processes"