mirror of
https://github.com/prometheus-community/postgres_exporter
synced 2025-04-26 13:08:01 +00:00
Add retries to getServer() (#316)
Some backstory ============== I was attempting to use postgres_exporter with the official Docker container (https://hub.docker.com/_/postgres) In a Kubernetes StatefulSet, with a side-car configuration, but found that I wasn't able to connect even with sharing the Postgres Unix listening socket, between both containers. After copying the container over to an Alpine base I quickly found out that the postgres_exporter was actually starting before the main Postres container had dropped the unix socket onto the file system, a quick work around is to write a bash for loop checking for the existence of a unix socket, however this would require maintaining a container, besides other users may find retries useful on startup. Implementation ============== All changes are made to the getServer function and variables are local, I was unsure if it was worth adding command line switches but this would allow for a more sophisticated backOff loop in the future. Hope this help, and let me know if you would like me to changes anything.
This commit is contained in:
parent
238f5c099a
commit
043e68e067
@ -863,17 +863,29 @@ func (s *Servers) GetServer(dsn string) (*Server, error) {
|
|||||||
s.m.Lock()
|
s.m.Lock()
|
||||||
defer s.m.Unlock()
|
defer s.m.Unlock()
|
||||||
var err error
|
var err error
|
||||||
server, ok := s.servers[dsn]
|
var ok bool
|
||||||
if !ok {
|
errCount := 0 // start at zero because we increment before doing work
|
||||||
server, err = NewServer(dsn, s.opts...)
|
retries := 3
|
||||||
if err != nil {
|
var server *Server
|
||||||
|
for {
|
||||||
|
if errCount++; errCount > retries {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
s.servers[dsn] = server
|
server, ok = s.servers[dsn]
|
||||||
}
|
if !ok {
|
||||||
if err = server.Ping(); err != nil {
|
server, err = NewServer(dsn, s.opts...)
|
||||||
delete(s.servers, dsn)
|
if err != nil {
|
||||||
return nil, err
|
time.Sleep(time.Duration(errCount) * time.Second)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
s.servers[dsn] = server
|
||||||
|
}
|
||||||
|
if err = server.Ping(); err != nil {
|
||||||
|
delete(s.servers, dsn)
|
||||||
|
time.Sleep(time.Duration(errCount) * time.Second)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
}
|
}
|
||||||
return server, nil
|
return server, nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user