2018-03-02 14:45:21 +00:00
|
|
|
// Copyright 2018 Prometheus Team
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package cluster
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"testing"
|
2018-04-10 08:30:12 +00:00
|
|
|
"time"
|
2018-03-02 14:45:21 +00:00
|
|
|
|
2018-08-22 15:40:07 +00:00
|
|
|
"github.com/hashicorp/go-sockaddr"
|
2018-03-02 14:45:21 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2024-11-06 09:09:57 +00:00
|
|
|
"github.com/prometheus/common/promslog"
|
2018-03-02 14:45:21 +00:00
|
|
|
)
|
|
|
|
|
2018-08-22 15:40:07 +00:00
|
|
|
func TestClusterJoinAndReconnect(t *testing.T) {
|
|
|
|
ip, _ := sockaddr.GetPrivateIP()
|
|
|
|
if ip == "" {
|
|
|
|
t.Skipf("skipping tests because no private IP address can be found")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
t.Run("TestJoinLeave", testJoinLeave)
|
|
|
|
t.Run("TestReconnect", testReconnect)
|
|
|
|
t.Run("TestRemoveFailedPeers", testRemoveFailedPeers)
|
|
|
|
t.Run("TestInitiallyFailingPeers", testInitiallyFailingPeers)
|
2021-08-09 20:58:06 +00:00
|
|
|
t.Run("TestTLSConnection", testTLSConnection)
|
2018-08-22 15:40:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func testJoinLeave(t *testing.T) {
|
2024-11-06 09:09:57 +00:00
|
|
|
logger := promslog.NewNopLogger()
|
2018-07-09 09:16:04 +00:00
|
|
|
p, err := Create(
|
2018-06-05 12:28:49 +00:00
|
|
|
logger,
|
|
|
|
prometheus.NewRegistry(),
|
2019-11-21 13:17:24 +00:00
|
|
|
"127.0.0.1:0",
|
2018-03-02 14:45:21 +00:00
|
|
|
"",
|
|
|
|
[]string{},
|
|
|
|
true,
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultPushPullInterval,
|
|
|
|
DefaultGossipInterval,
|
2022-03-25 16:59:51 +00:00
|
|
|
DefaultTCPTimeout,
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultProbeTimeout,
|
|
|
|
DefaultProbeInterval,
|
2021-08-09 20:58:06 +00:00
|
|
|
nil,
|
2021-11-10 16:40:48 +00:00
|
|
|
false,
|
2023-05-05 16:26:22 +00:00
|
|
|
"",
|
2018-07-09 09:16:04 +00:00
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, p)
|
|
|
|
err = p.Join(
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultReconnectInterval,
|
|
|
|
DefaultReconnectTimeout,
|
2018-03-02 14:45:21 +00:00
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.False(t, p.Ready())
|
2021-03-09 13:25:34 +00:00
|
|
|
{
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
cancel()
|
|
|
|
require.Equal(t, context.Canceled, p.WaitReady(ctx))
|
|
|
|
}
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Equal(t, "settling", p.Status())
|
2018-04-10 08:30:12 +00:00
|
|
|
go p.Settle(context.Background(), 0*time.Second)
|
2021-03-09 13:25:34 +00:00
|
|
|
require.NoError(t, p.WaitReady(context.Background()))
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Equal(t, "ready", p.Status())
|
2018-06-05 12:28:49 +00:00
|
|
|
|
|
|
|
// Create the peer who joins the first.
|
2018-07-09 09:16:04 +00:00
|
|
|
p2, err := Create(
|
2018-06-05 12:28:49 +00:00
|
|
|
logger,
|
|
|
|
prometheus.NewRegistry(),
|
2019-11-21 13:17:24 +00:00
|
|
|
"127.0.0.1:0",
|
2018-06-05 12:28:49 +00:00
|
|
|
"",
|
|
|
|
[]string{p.Self().Address()},
|
|
|
|
true,
|
|
|
|
DefaultPushPullInterval,
|
|
|
|
DefaultGossipInterval,
|
2022-03-25 16:59:51 +00:00
|
|
|
DefaultTCPTimeout,
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultProbeTimeout,
|
|
|
|
DefaultProbeInterval,
|
2021-08-09 20:58:06 +00:00
|
|
|
nil,
|
2021-11-10 16:40:48 +00:00
|
|
|
false,
|
2023-05-05 16:26:22 +00:00
|
|
|
"",
|
2018-07-09 09:16:04 +00:00
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, p2)
|
|
|
|
err = p2.Join(
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultReconnectInterval,
|
|
|
|
DefaultReconnectTimeout,
|
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
go p2.Settle(context.Background(), 0*time.Second)
|
2021-08-04 14:13:51 +00:00
|
|
|
require.NoError(t, p2.WaitReady(context.Background()))
|
2018-06-05 12:28:49 +00:00
|
|
|
|
|
|
|
require.Equal(t, 2, p.ClusterSize())
|
|
|
|
p2.Leave(0 * time.Second)
|
|
|
|
require.Equal(t, 1, p.ClusterSize())
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Len(t, p.failedPeers, 1)
|
2018-06-05 12:28:49 +00:00
|
|
|
require.Equal(t, p2.Self().Address(), p.peers[p2.Self().Address()].Node.Address())
|
|
|
|
require.Equal(t, p2.Name(), p.failedPeers[0].Name)
|
|
|
|
}
|
|
|
|
|
2018-08-22 15:40:07 +00:00
|
|
|
func testReconnect(t *testing.T) {
|
2024-11-06 09:09:57 +00:00
|
|
|
logger := promslog.NewNopLogger()
|
2018-07-09 09:16:04 +00:00
|
|
|
p, err := Create(
|
2018-06-05 12:28:49 +00:00
|
|
|
logger,
|
|
|
|
prometheus.NewRegistry(),
|
2019-11-21 13:17:24 +00:00
|
|
|
"127.0.0.1:0",
|
2018-06-05 12:28:49 +00:00
|
|
|
"",
|
|
|
|
[]string{},
|
|
|
|
true,
|
|
|
|
DefaultPushPullInterval,
|
|
|
|
DefaultGossipInterval,
|
2022-03-25 16:59:51 +00:00
|
|
|
DefaultTCPTimeout,
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultProbeTimeout,
|
|
|
|
DefaultProbeInterval,
|
2021-08-09 20:58:06 +00:00
|
|
|
nil,
|
2021-11-10 16:40:48 +00:00
|
|
|
false,
|
2023-05-05 16:26:22 +00:00
|
|
|
"",
|
2018-07-09 09:16:04 +00:00
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, p)
|
|
|
|
err = p.Join(
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultReconnectInterval,
|
|
|
|
DefaultReconnectTimeout,
|
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
go p.Settle(context.Background(), 0*time.Second)
|
2021-03-09 13:25:34 +00:00
|
|
|
require.NoError(t, p.WaitReady(context.Background()))
|
2018-06-05 12:28:49 +00:00
|
|
|
|
2018-07-09 09:16:04 +00:00
|
|
|
p2, err := Create(
|
2018-06-05 12:28:49 +00:00
|
|
|
logger,
|
|
|
|
prometheus.NewRegistry(),
|
2019-11-21 13:17:24 +00:00
|
|
|
"127.0.0.1:0",
|
2018-06-05 12:28:49 +00:00
|
|
|
"",
|
|
|
|
[]string{},
|
|
|
|
true,
|
|
|
|
DefaultPushPullInterval,
|
|
|
|
DefaultGossipInterval,
|
2022-03-25 16:59:51 +00:00
|
|
|
DefaultTCPTimeout,
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultProbeTimeout,
|
|
|
|
DefaultProbeInterval,
|
2021-08-09 20:58:06 +00:00
|
|
|
nil,
|
2021-11-10 16:40:48 +00:00
|
|
|
false,
|
2023-05-05 16:26:22 +00:00
|
|
|
"",
|
2018-07-09 09:16:04 +00:00
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, p2)
|
|
|
|
err = p2.Join(
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultReconnectInterval,
|
|
|
|
DefaultReconnectTimeout,
|
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
go p2.Settle(context.Background(), 0*time.Second)
|
2021-03-09 13:25:34 +00:00
|
|
|
require.NoError(t, p2.WaitReady(context.Background()))
|
2018-06-05 12:28:49 +00:00
|
|
|
|
|
|
|
p.peerJoin(p2.Self())
|
|
|
|
p.peerLeave(p2.Self())
|
|
|
|
|
|
|
|
require.Equal(t, 1, p.ClusterSize())
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Len(t, p.failedPeers, 1)
|
2018-06-05 12:28:49 +00:00
|
|
|
|
|
|
|
p.reconnect()
|
|
|
|
|
|
|
|
require.Equal(t, 2, p.ClusterSize())
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Empty(t, p.failedPeers)
|
2018-06-05 12:28:49 +00:00
|
|
|
require.Equal(t, StatusAlive, p.peers[p2.Self().Address()].status)
|
|
|
|
}
|
|
|
|
|
2018-08-22 15:40:07 +00:00
|
|
|
func testRemoveFailedPeers(t *testing.T) {
|
2024-11-06 09:09:57 +00:00
|
|
|
logger := promslog.NewNopLogger()
|
2018-07-09 09:16:04 +00:00
|
|
|
p, err := Create(
|
2018-06-05 12:28:49 +00:00
|
|
|
logger,
|
|
|
|
prometheus.NewRegistry(),
|
2019-11-21 13:17:24 +00:00
|
|
|
"127.0.0.1:0",
|
2018-06-05 12:28:49 +00:00
|
|
|
"",
|
|
|
|
[]string{},
|
|
|
|
true,
|
|
|
|
DefaultPushPullInterval,
|
|
|
|
DefaultGossipInterval,
|
2022-03-25 16:59:51 +00:00
|
|
|
DefaultTCPTimeout,
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultProbeTimeout,
|
|
|
|
DefaultProbeInterval,
|
2021-08-09 20:58:06 +00:00
|
|
|
nil,
|
2021-11-10 16:40:48 +00:00
|
|
|
false,
|
2023-05-05 16:26:22 +00:00
|
|
|
"",
|
2018-07-09 09:16:04 +00:00
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, p)
|
|
|
|
err = p.Join(
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultReconnectInterval,
|
|
|
|
DefaultReconnectTimeout,
|
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
n := p.Self()
|
|
|
|
|
|
|
|
now := time.Now()
|
|
|
|
p1 := peer{
|
|
|
|
status: StatusFailed,
|
|
|
|
leaveTime: now,
|
|
|
|
Node: n,
|
|
|
|
}
|
|
|
|
p2 := peer{
|
|
|
|
status: StatusFailed,
|
|
|
|
leaveTime: now.Add(-time.Hour),
|
|
|
|
Node: n,
|
|
|
|
}
|
|
|
|
p3 := peer{
|
|
|
|
status: StatusFailed,
|
|
|
|
leaveTime: now.Add(30 * -time.Minute),
|
|
|
|
Node: n,
|
|
|
|
}
|
|
|
|
p.failedPeers = []peer{p1, p2, p3}
|
|
|
|
|
|
|
|
p.removeFailedPeers(30 * time.Minute)
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Len(t, p.failedPeers, 1)
|
2018-06-05 12:28:49 +00:00
|
|
|
require.Equal(t, p1, p.failedPeers[0])
|
|
|
|
}
|
|
|
|
|
2018-08-22 15:40:07 +00:00
|
|
|
func testInitiallyFailingPeers(t *testing.T) {
|
2024-11-06 09:09:57 +00:00
|
|
|
logger := promslog.NewNopLogger()
|
2018-06-08 10:34:52 +00:00
|
|
|
myAddr := "1.2.3.4:5000"
|
2018-06-15 10:34:50 +00:00
|
|
|
peerAddrs := []string{myAddr, "2.3.4.5:5000", "3.4.5.6:5000", "foo.example.com:5000"}
|
2018-07-09 09:16:04 +00:00
|
|
|
p, err := Create(
|
2018-06-05 12:28:49 +00:00
|
|
|
logger,
|
|
|
|
prometheus.NewRegistry(),
|
2019-11-21 13:17:24 +00:00
|
|
|
"127.0.0.1:0",
|
2018-06-05 12:28:49 +00:00
|
|
|
"",
|
|
|
|
[]string{},
|
|
|
|
true,
|
|
|
|
DefaultPushPullInterval,
|
|
|
|
DefaultGossipInterval,
|
2022-03-25 16:59:51 +00:00
|
|
|
DefaultTCPTimeout,
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultProbeTimeout,
|
|
|
|
DefaultProbeInterval,
|
2021-08-09 20:58:06 +00:00
|
|
|
nil,
|
2021-11-10 16:40:48 +00:00
|
|
|
false,
|
2023-05-05 16:26:22 +00:00
|
|
|
"",
|
2018-07-09 09:16:04 +00:00
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, p)
|
|
|
|
err = p.Join(
|
2018-06-05 12:28:49 +00:00
|
|
|
DefaultReconnectInterval,
|
|
|
|
DefaultReconnectTimeout,
|
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
2018-06-08 10:34:52 +00:00
|
|
|
p.setInitialFailed(peerAddrs, myAddr)
|
2018-06-05 12:28:49 +00:00
|
|
|
|
2018-06-15 10:34:50 +00:00
|
|
|
// We shouldn't have added "our" bind addr and the FQDN address to the
|
|
|
|
// failed peers list.
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Len(t, p.failedPeers, len(peerAddrs)-2)
|
2018-06-05 12:28:49 +00:00
|
|
|
for _, addr := range peerAddrs {
|
2018-06-15 10:34:50 +00:00
|
|
|
if addr == myAddr || addr == "foo.example.com:5000" {
|
2018-06-08 10:34:52 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2018-06-05 12:28:49 +00:00
|
|
|
pr, ok := p.peers[addr]
|
|
|
|
require.True(t, ok)
|
2018-06-08 10:34:52 +00:00
|
|
|
require.Equal(t, StatusFailed.String(), pr.status.String())
|
|
|
|
require.Equal(t, addr, pr.Address())
|
|
|
|
expectedLen := len(p.failedPeers) - 1
|
|
|
|
p.peerJoin(pr.Node)
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Len(t, p.failedPeers, expectedLen)
|
2018-06-05 12:28:49 +00:00
|
|
|
}
|
2018-03-02 14:45:21 +00:00
|
|
|
}
|
2021-08-09 20:58:06 +00:00
|
|
|
|
|
|
|
func testTLSConnection(t *testing.T) {
|
2024-11-06 09:09:57 +00:00
|
|
|
logger := promslog.NewNopLogger()
|
2021-08-09 20:58:06 +00:00
|
|
|
tlsTransportConfig1, err := GetTLSTransportConfig("./testdata/tls_config_node1.yml")
|
|
|
|
require.NoError(t, err)
|
|
|
|
p1, err := Create(
|
|
|
|
logger,
|
|
|
|
prometheus.NewRegistry(),
|
|
|
|
"127.0.0.1:0",
|
|
|
|
"",
|
|
|
|
[]string{},
|
|
|
|
true,
|
|
|
|
DefaultPushPullInterval,
|
|
|
|
DefaultGossipInterval,
|
2022-03-25 16:59:51 +00:00
|
|
|
DefaultTCPTimeout,
|
2021-08-09 20:58:06 +00:00
|
|
|
DefaultProbeTimeout,
|
|
|
|
DefaultProbeInterval,
|
|
|
|
tlsTransportConfig1,
|
2021-11-10 16:40:48 +00:00
|
|
|
false,
|
2023-05-05 16:26:22 +00:00
|
|
|
"",
|
2021-08-09 20:58:06 +00:00
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, p1)
|
|
|
|
err = p1.Join(
|
|
|
|
DefaultReconnectInterval,
|
|
|
|
DefaultReconnectTimeout,
|
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.False(t, p1.Ready())
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Equal(t, "settling", p1.Status())
|
2021-08-09 20:58:06 +00:00
|
|
|
go p1.Settle(context.Background(), 0*time.Second)
|
|
|
|
p1.WaitReady(context.Background())
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Equal(t, "ready", p1.Status())
|
2021-08-09 20:58:06 +00:00
|
|
|
|
|
|
|
// Create the peer who joins the first.
|
|
|
|
tlsTransportConfig2, err := GetTLSTransportConfig("./testdata/tls_config_node2.yml")
|
|
|
|
require.NoError(t, err)
|
|
|
|
p2, err := Create(
|
|
|
|
logger,
|
|
|
|
prometheus.NewRegistry(),
|
|
|
|
"127.0.0.1:0",
|
|
|
|
"",
|
|
|
|
[]string{p1.Self().Address()},
|
|
|
|
true,
|
|
|
|
DefaultPushPullInterval,
|
|
|
|
DefaultGossipInterval,
|
2022-03-25 16:59:51 +00:00
|
|
|
DefaultTCPTimeout,
|
2021-08-09 20:58:06 +00:00
|
|
|
DefaultProbeTimeout,
|
|
|
|
DefaultProbeInterval,
|
|
|
|
tlsTransportConfig2,
|
2021-11-10 16:40:48 +00:00
|
|
|
false,
|
2023-05-05 16:26:22 +00:00
|
|
|
"",
|
2021-08-09 20:58:06 +00:00
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, p2)
|
|
|
|
err = p2.Join(
|
|
|
|
DefaultReconnectInterval,
|
|
|
|
DefaultReconnectTimeout,
|
|
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
|
|
go p2.Settle(context.Background(), 0*time.Second)
|
2023-11-03 14:50:06 +00:00
|
|
|
p2.WaitReady(context.Background())
|
2024-02-14 11:18:28 +00:00
|
|
|
require.Equal(t, "ready", p2.Status())
|
|
|
|
|
2021-08-09 20:58:06 +00:00
|
|
|
require.Equal(t, 2, p1.ClusterSize())
|
|
|
|
p2.Leave(0 * time.Second)
|
|
|
|
require.Equal(t, 1, p1.ClusterSize())
|
2023-12-10 08:33:13 +00:00
|
|
|
require.Len(t, p1.failedPeers, 1)
|
2021-08-09 20:58:06 +00:00
|
|
|
require.Equal(t, p2.Self().Address(), p1.peers[p2.Self().Address()].Node.Address())
|
|
|
|
require.Equal(t, p2.Name(), p1.failedPeers[0].Name)
|
|
|
|
}
|