fix set initial failed peers (#1407)
* Correctly add Node to initially failed peer Reconnect attempts to failed peers were panicking because peer.Address() would attempt to access the nil Node struct member. Signed-off-by: stuart nelson <stuartnelson3@gmail.com> * Correctly remove old peers Again, since we aren't assigning a name (this is generated) we rely on the node's Address for removing the initially joining (and potentially later re-joining) peers Signed-off-by: stuart nelson <stuartnelson3@gmail.com> * Test the peerJoin removes initial peers Signed-off-by: stuart nelson <stuartnelson3@gmail.com> * Don't add self to failing peers list The initially failing peers list shouldn't include the bindAddr for the alertmanager itself, as this connection is never made, and consequently only removed from the failedPeers list after the failed peer timeout. Signed-off-by: stuart nelson <stuartnelson3@gmail.com> * Filter initialFailed with advertise addr This may differ from bindAddr, and is the value we want to not attempt to connect to. Signed-off-by: stuart nelson <stuartnelson3@gmail.com>
This commit is contained in:
parent
36588c3865
commit
6305229fcc
|
@ -205,7 +205,7 @@ func Join(
|
|||
}
|
||||
p.mlist = ml
|
||||
|
||||
p.setInitialFailed(resolvedPeers)
|
||||
p.setInitialFailed(resolvedPeers, fmt.Sprintf("%s:%d", advertiseHost, advertisePort))
|
||||
|
||||
n, err := ml.Join(resolvedPeers)
|
||||
if err != nil {
|
||||
|
@ -226,16 +226,34 @@ func Join(
|
|||
|
||||
// All peers are initially added to the failed list. They will be removed from
|
||||
// this list in peerJoin when making their initial connection.
|
||||
func (p *Peer) setInitialFailed(peers []string) {
|
||||
func (p *Peer) setInitialFailed(peers []string, myAddr string) {
|
||||
if len(peers) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
for _, peerAddr := range peers {
|
||||
if peerAddr == myAddr {
|
||||
// Don't add ourselves to the initially failing list,
|
||||
// we don't connect to ourselves.
|
||||
continue
|
||||
}
|
||||
ip, port, err := net.SplitHostPort(peerAddr)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
portUint, err := strconv.ParseUint(port, 10, 16)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
pr := peer{
|
||||
status: StatusNone,
|
||||
status: StatusFailed,
|
||||
leaveTime: now,
|
||||
Node: &memberlist.Node{
|
||||
Addr: net.ParseIP(ip),
|
||||
Port: uint16(portUint),
|
||||
},
|
||||
}
|
||||
p.failedPeers = append(p.failedPeers, pr)
|
||||
p.peers[peerAddr] = pr
|
||||
|
@ -378,7 +396,7 @@ func (p *Peer) peerJoin(n *memberlist.Node) {
|
|||
|
||||
if oldStatus == StatusFailed {
|
||||
level.Debug(p.logger).Log("msg", "peer rejoined", "peer", pr.Node)
|
||||
p.failedPeers = removeOldPeer(p.failedPeers, pr.Name)
|
||||
p.failedPeers = removeOldPeer(p.failedPeers, pr.Address())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -700,10 +718,10 @@ func retry(interval time.Duration, stopc <-chan struct{}, f func() error) error
|
|||
}
|
||||
}
|
||||
|
||||
func removeOldPeer(old []peer, name string) []peer {
|
||||
func removeOldPeer(old []peer, addr string) []peer {
|
||||
new := make([]peer, 0, len(old))
|
||||
for _, p := range old {
|
||||
if p.Name != name {
|
||||
if p.Address() != addr {
|
||||
new = append(new, p)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -178,7 +178,8 @@ func TestRemoveFailedPeers(t *testing.T) {
|
|||
|
||||
func TestInitiallyFailingPeers(t *testing.T) {
|
||||
logger := log.NewNopLogger()
|
||||
peerAddrs := []string{"1.2.3.4:5000", "2.3.4.5:5000", "3.4.5.6:5000"}
|
||||
myAddr := "1.2.3.4:5000"
|
||||
peerAddrs := []string{myAddr, "2.3.4.5:5000", "3.4.5.6:5000"}
|
||||
p, err := Join(
|
||||
logger,
|
||||
prometheus.NewRegistry(),
|
||||
|
@ -197,12 +198,21 @@ func TestInitiallyFailingPeers(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
require.NotNil(t, p)
|
||||
|
||||
p.setInitialFailed(peerAddrs)
|
||||
p.setInitialFailed(peerAddrs, myAddr)
|
||||
|
||||
require.Equal(t, len(peerAddrs), len(p.failedPeers))
|
||||
// We shouldn't have added "our" bind addr to the failed peers list
|
||||
require.Equal(t, len(peerAddrs)-1, len(p.failedPeers))
|
||||
for _, addr := range peerAddrs {
|
||||
if addr == myAddr {
|
||||
continue
|
||||
}
|
||||
|
||||
pr, ok := p.peers[addr]
|
||||
require.True(t, ok)
|
||||
require.Equal(t, StatusNone, pr.status)
|
||||
require.Equal(t, StatusFailed.String(), pr.status.String())
|
||||
require.Equal(t, addr, pr.Address())
|
||||
expectedLen := len(p.failedPeers) - 1
|
||||
p.peerJoin(pr.Node)
|
||||
require.Equal(t, expectedLen, len(p.failedPeers))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue