mirror of
https://github.com/prometheus-community/postgres_exporter
synced 2025-02-07 14:01:33 +00:00
Add retries to getServer() (#316)
Some backstory ============== I was attempting to use postgres_exporter with the official Docker container (https://hub.docker.com/_/postgres) In a Kubernetes StatefulSet, with a side-car configuration, but found that I wasn't able to connect even with sharing the Postgres Unix listening socket, between both containers. After copying the container over to an Alpine base I quickly found out that the postgres_exporter was actually starting before the main Postres container had dropped the unix socket onto the file system, a quick work around is to write a bash for loop checking for the existence of a unix socket, however this would require maintaining a container, besides other users may find retries useful on startup. Implementation ============== All changes are made to the getServer function and variables are local, I was unsure if it was worth adding command line switches but this would allow for a more sophisticated backOff loop in the future. Hope this help, and let me know if you would like me to changes anything.
This commit is contained in:
parent
238f5c099a
commit
043e68e067
@ -863,17 +863,29 @@ func (s *Servers) GetServer(dsn string) (*Server, error) {
|
||||
s.m.Lock()
|
||||
defer s.m.Unlock()
|
||||
var err error
|
||||
server, ok := s.servers[dsn]
|
||||
if !ok {
|
||||
server, err = NewServer(dsn, s.opts...)
|
||||
if err != nil {
|
||||
var ok bool
|
||||
errCount := 0 // start at zero because we increment before doing work
|
||||
retries := 3
|
||||
var server *Server
|
||||
for {
|
||||
if errCount++; errCount > retries {
|
||||
return nil, err
|
||||
}
|
||||
s.servers[dsn] = server
|
||||
}
|
||||
if err = server.Ping(); err != nil {
|
||||
delete(s.servers, dsn)
|
||||
return nil, err
|
||||
server, ok = s.servers[dsn]
|
||||
if !ok {
|
||||
server, err = NewServer(dsn, s.opts...)
|
||||
if err != nil {
|
||||
time.Sleep(time.Duration(errCount) * time.Second)
|
||||
continue
|
||||
}
|
||||
s.servers[dsn] = server
|
||||
}
|
||||
if err = server.Ping(); err != nil {
|
||||
delete(s.servers, dsn)
|
||||
time.Sleep(time.Duration(errCount) * time.Second)
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
return server, nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user