Fix pg_up metric returns last calculated value without explicit resetting (#291)

If exporter is scraped by multiple Prometheuses (as we do) - Collect() could be called concurrently. In result in some cases one of Prometheuses could get pg_up = 0, because it was explicitly set to zero on first Collect call.
This commit is contained in:
Dmitry Ulyanov 2019-08-12 06:35:58 +03:00 committed by Will Rouesnel
parent 1ab8d2bbc8
commit 238f5c099a

View File

@ -129,6 +129,16 @@ type MetricMap struct {
conversion func(interface{}) (float64, bool) // Conversion function to turn PG result into float64
}
// ErrorConnectToServer is a connection to PgSQL server error
type ErrorConnectToServer struct {
Msg string
}
// Error returns error
func (e *ErrorConnectToServer) Error() string {
return e.Msg
}
// TODO: revisit this with the semver system
func dumpMaps() {
// TODO: make this function part of the exporter
@ -813,21 +823,24 @@ func (s *Server) String() string {
}
// Scrape loads metrics.
func (s *Server) Scrape(ch chan<- prometheus.Metric, errGauge prometheus.Gauge, disableSettingsMetrics bool) {
func (s *Server) Scrape(ch chan<- prometheus.Metric, disableSettingsMetrics bool) error {
s.mappingMtx.RLock()
defer s.mappingMtx.RUnlock()
var err error
if !disableSettingsMetrics {
if err := querySettings(ch, s); err != nil {
log.Errorf("Error retrieving settings: %s", err)
errGauge.Inc()
if err = querySettings(ch, s); err != nil {
err = fmt.Errorf("error retrieving settings: %s", err)
}
}
errMap := queryNamespaceMappings(ch, s)
if len(errMap) > 0 {
errGauge.Inc()
err = fmt.Errorf("queryNamespaceMappings returned %d errors", len(errMap))
}
return err
}
// Servers contains a collection of servers to Postgres.
@ -1289,16 +1302,40 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) {
e.duration.Set(time.Since(begun).Seconds())
}(time.Now())
e.error.Set(0)
e.psqlUp.Set(0)
e.totalScrapes.Inc()
dsns := e.dsn
if e.autoDiscoverDatabases {
dsns = e.discoverDatabaseDSNs()
}
var errorsCount int
var connectionErrorsCount int
for _, dsn := range dsns {
e.scrapeDSN(ch, dsn)
if err := e.scrapeDSN(ch, dsn); err != nil {
errorsCount++
log.Errorf(err.Error())
if _, ok := err.(*ErrorConnectToServer); ok {
connectionErrorsCount++
}
}
}
switch {
case connectionErrorsCount >= len(dsns):
e.psqlUp.Set(0)
default:
e.psqlUp.Set(1) // Didn't fail, can mark connection as up for this scrape.
}
switch errorsCount {
case 0:
e.error.Set(0)
default:
e.error.Set(1)
}
}
@ -1342,24 +1379,18 @@ func (e *Exporter) discoverDatabaseDSNs() []string {
return result
}
func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) {
func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) error {
server, err := e.servers.GetServer(dsn)
if err != nil {
log.Errorf("Error opening connection to database (%s): %v", loggableDSN(dsn), err)
e.error.Inc()
return
return &ErrorConnectToServer{fmt.Sprintf("Error opening connection to database (%s): %s", loggableDSN(dsn), err)}
}
// Didn't fail, can mark connection as up for this scrape.
e.psqlUp.Inc()
// Check if map versions need to be updated
if err := e.checkMapVersions(ch, server); err != nil {
log.Warnln("Proceeding with outdated query maps, as the Postgres version could not be determined:", err)
e.error.Inc()
}
server.Scrape(ch, e.error, e.disableSettingsMetrics)
return server.Scrape(ch, e.disableSettingsMetrics)
}
// try to get the DataSource