From 12c12cf368cd9dabc305336bbec11f2f9d61ca93 Mon Sep 17 00:00:00 2001 From: Felix Yuan <felix.yuan@reddit.com> Date: Wed, 19 Jul 2023 14:24:08 -0700 Subject: [PATCH] Add a logger to stat_database collector to get better handle on error (also clean up some metric validity checks) Signed-off-by: Felix Yuan <felix.yuan@reddit.com> --- collector/pg_stat_database.go | 246 ++++++++++++++--------------- collector/pg_stat_database_test.go | 165 ++++++++++++------- 2 files changed, 225 insertions(+), 186 deletions(-) diff --git a/collector/pg_stat_database.go b/collector/pg_stat_database.go index 8a882f89..382ff782 100644 --- a/collector/pg_stat_database.go +++ b/collector/pg_stat_database.go @@ -17,6 +17,8 @@ import ( "context" "database/sql" + "github.com/go-kit/log" + "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" ) @@ -26,10 +28,12 @@ func init() { registerCollector(statDatabaseSubsystem, defaultEnabled, NewPGStatDatabaseCollector) } -type PGStatDatabaseCollector struct{} +type PGStatDatabaseCollector struct { + log log.Logger +} func NewPGStatDatabaseCollector(config collectorConfig) (Collector, error) { - return &PGStatDatabaseCollector{}, nil + return &PGStatDatabaseCollector{log: config.logger}, nil } var ( @@ -228,7 +232,7 @@ var ( ` ) -func (PGStatDatabaseCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { +func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { db := instance.getDB() rows, err := db.QueryContext(ctx, statDatabaseQuery, @@ -267,217 +271,203 @@ func (PGStatDatabaseCollector) Update(ctx context.Context, instance *instance, c if err != nil { return err } - datidLabel := "unknown" - if datid.Valid { - datidLabel = datid.String + + if !datid.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no datid") + continue } - datnameLabel := "unknown" - if datname.Valid { - datnameLabel = datname.String + if !datname.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no datname") + continue + } + if !numBackends.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no numbackends") + continue + } + if !xactCommit.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no xact_commit") + continue + } + if !xactRollback.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no xact_rollback") + continue + } + if !blksRead.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no blks_read") + continue + } + if !blksHit.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no blks_hit") + continue + } + if !tupReturned.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_returned") + continue + } + if !tupFetched.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_fetched") + continue + } + if !tupInserted.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_inserted") + continue + } + if !tupUpdated.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_updated") + continue + } + if !tupDeleted.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_deleted") + continue + } + if !conflicts.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no conflicts") + continue + } + if !tempFiles.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no temp_files") + continue + } + if !tempBytes.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no temp_bytes") + continue + } + if !deadlocks.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no deadlocks") + continue + } + if !blkReadTime.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no blk_read_time") + continue + } + if !blkWriteTime.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no blk_write_time") + continue + } + if !statsReset.Valid { + level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no stats_reset") + continue } - numBackendsMetric := 0.0 - if numBackends.Valid { - numBackendsMetric = numBackends.Float64 - } + labels := []string{datid.String, datname.String} + ch <- prometheus.MustNewConstMetric( statDatabaseNumbackends, prometheus.GaugeValue, - numBackendsMetric, - datidLabel, - datnameLabel, + numBackends.Float64, + labels..., ) - xactCommitMetric := 0.0 - if xactCommit.Valid { - xactCommitMetric = xactCommit.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseXactCommit, prometheus.CounterValue, - xactCommitMetric, - datidLabel, - datnameLabel, + xactCommit.Float64, + labels..., ) - xactRollbackMetric := 0.0 - if xactRollback.Valid { - xactRollbackMetric = xactRollback.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseXactRollback, prometheus.CounterValue, - xactRollbackMetric, - datidLabel, - datnameLabel, + xactRollback.Float64, + labels..., ) - blksReadMetric := 0.0 - if blksRead.Valid { - blksReadMetric = blksRead.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseBlksRead, prometheus.CounterValue, - blksReadMetric, - datidLabel, - datnameLabel, + blksRead.Float64, + labels..., ) - blksHitMetric := 0.0 - if blksHit.Valid { - blksHitMetric = blksHit.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseBlksHit, prometheus.CounterValue, - blksHitMetric, - datidLabel, - datnameLabel, + blksHit.Float64, + labels..., ) - tupReturnedMetric := 0.0 - if tupReturned.Valid { - tupReturnedMetric = tupReturned.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseTupReturned, prometheus.CounterValue, - tupReturnedMetric, - datidLabel, - datnameLabel, + tupReturned.Float64, + labels..., ) - tupFetchedMetric := 0.0 - if tupFetched.Valid { - tupFetchedMetric = tupFetched.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseTupFetched, prometheus.CounterValue, - tupFetchedMetric, - datidLabel, - datnameLabel, + tupFetched.Float64, + labels..., ) - tupInsertedMetric := 0.0 - if tupInserted.Valid { - tupInsertedMetric = tupInserted.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseTupInserted, prometheus.CounterValue, - tupInsertedMetric, - datidLabel, - datnameLabel, + tupInserted.Float64, + labels..., ) - tupUpdatedMetric := 0.0 - if tupUpdated.Valid { - tupUpdatedMetric = tupUpdated.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseTupUpdated, prometheus.CounterValue, - tupUpdatedMetric, - datidLabel, - datnameLabel, + tupUpdated.Float64, + labels..., ) - tupDeletedMetric := 0.0 - if tupDeleted.Valid { - tupDeletedMetric = tupDeleted.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseTupDeleted, prometheus.CounterValue, - tupDeletedMetric, - datidLabel, - datnameLabel, + tupDeleted.Float64, + labels..., ) - conflictsMetric := 0.0 - if conflicts.Valid { - conflictsMetric = conflicts.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseConflicts, prometheus.CounterValue, - conflictsMetric, - datidLabel, - datnameLabel, + conflicts.Float64, + labels..., ) - tempFilesMetric := 0.0 - if tempFiles.Valid { - tempFilesMetric = tempFiles.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseTempFiles, prometheus.CounterValue, - tempFilesMetric, - datidLabel, - datnameLabel, + tempFiles.Float64, + labels..., ) - tempBytesMetric := 0.0 - if tempBytes.Valid { - tempBytesMetric = tempBytes.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseTempBytes, prometheus.CounterValue, - tempBytesMetric, - datidLabel, - datnameLabel, + tempBytes.Float64, + labels..., ) - deadlocksMetric := 0.0 - if deadlocks.Valid { - deadlocksMetric = deadlocks.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseDeadlocks, prometheus.CounterValue, - deadlocksMetric, - datidLabel, - datnameLabel, + deadlocks.Float64, + labels..., ) - blkReadTimeMetric := 0.0 - if blkReadTime.Valid { - blkReadTimeMetric = blkReadTime.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseBlkReadTime, prometheus.CounterValue, - blkReadTimeMetric, - datidLabel, - datnameLabel, + blkReadTime.Float64, + labels..., ) - blkWriteTimeMetric := 0.0 - if blkWriteTime.Valid { - blkWriteTimeMetric = blkWriteTime.Float64 - } ch <- prometheus.MustNewConstMetric( statDatabaseBlkWriteTime, prometheus.CounterValue, - blkWriteTimeMetric, - datidLabel, - datnameLabel, + blkWriteTime.Float64, + labels..., ) - statsResetMetric := 0.0 - if statsReset.Valid { - statsResetMetric = float64(statsReset.Time.Unix()) - } ch <- prometheus.MustNewConstMetric( statDatabaseStatsReset, prometheus.CounterValue, - statsResetMetric, - datidLabel, - datnameLabel, + float64(statsReset.Time.Unix()), + labels..., ) } return nil diff --git a/collector/pg_stat_database_test.go b/collector/pg_stat_database_test.go index 1fe92eed..70c73eb5 100644 --- a/collector/pg_stat_database_test.go +++ b/collector/pg_stat_database_test.go @@ -18,6 +18,7 @@ import ( "time" "github.com/DATA-DOG/go-sqlmock" + "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/smartystreets/goconvey/convey" @@ -86,7 +87,9 @@ func TestPGStatDatabaseCollector(t *testing.T) { ch := make(chan prometheus.Metric) go func() { defer close(ch) - c := PGStatDatabaseCollector{} + c := PGStatDatabaseCollector{ + log: log.With(log.NewNopLogger(), "collector", "pg_stat_database"), + } if err := c.Update(context.Background(), inst, ch); err != nil { t.Errorf("Error calling PGStatDatabaseCollector.Update: %s", err) @@ -131,6 +134,10 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { } defer db.Close() + srT, err := time.Parse("2006-01-02 15:04:05.00000-07", "2023-05-25 17:10:42.81132-07") + if err != nil { + t.Fatalf("Error parsing time: %s", err) + } inst := &instance{db: db} columns := []string{ @@ -158,31 +165,52 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { rows := sqlmock.NewRows(columns). AddRow( nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - ) + "postgres", + 354, + 4945, + 289097744, + 1242257, + int64(3275602074), + 89320867, + 450139, + 2034563757, + 0, + int64(2725688749), + 23, + 52, + 74, + 925, + 16, + 823, + srT). + AddRow( + "pid", + "postgres", + 354, + 4945, + 289097744, + 1242257, + int64(3275602074), + 89320867, + 450139, + 2034563757, + 0, + int64(2725688749), + 23, + 52, + 74, + 925, + 16, + 823, + srT) mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) - c := PGStatDatabaseCollector{} + c := PGStatDatabaseCollector{ + log: log.With(log.NewNopLogger(), "collector", "pg_stat_database"), + } if err := c.Update(context.Background(), inst, ch); err != nil { t.Errorf("Error calling PGStatDatabaseCollector.Update: %s", err) @@ -190,23 +218,23 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { }() expected := []MetricResult{ - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_GAUGE, value: 354}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 4945}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 289097744}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1242257}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 3275602074}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 89320867}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 450139}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 2034563757}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 2725688749}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 23}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 52}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 74}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 925}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, } convey.Convey("Metrics comparison", t, func() { @@ -296,14 +324,35 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { nil, nil, nil, - ) - + ). + AddRow( + "pid", + "postgres", + 355, + 4946, + 289097745, + 1242258, + int64(3275602075), + 89320868, + 450140, + 2034563758, + 1, + int64(2725688750), + 24, + 53, + 75, + 926, + 17, + 824, + srT) mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) - c := PGStatDatabaseCollector{} + c := PGStatDatabaseCollector{ + log: log.With(log.NewNopLogger(), "collector", "pg_stat_database"), + } if err := c.Update(context.Background(), inst, ch); err != nil { t.Errorf("Error calling PGStatDatabaseCollector.Update: %s", err) @@ -328,23 +377,23 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, - {labels: labelMap{"datid": "unknown", "datname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_GAUGE, value: 355}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 4946}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 289097745}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1242258}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 3275602075}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 89320868}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 450140}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 2034563758}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 2725688750}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 24}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 53}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 75}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 926}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 17}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 824}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, } convey.Convey("Metrics comparison", t, func() {