diff --git a/cluster/advertise.go b/cluster/advertise.go index 8eca5463..8f0f19dd 100644 --- a/cluster/advertise.go +++ b/cluster/advertise.go @@ -20,10 +20,11 @@ import ( "github.com/pkg/errors" ) -type getPrivateIPFunc func() (string, error) +type getIPFunc func() (string, error) -// This is overridden in unit tests to mock the sockaddr.GetPrivateIP function. -var getPrivateAddress getPrivateIPFunc = sockaddr.GetPrivateIP +// These are overridden in unit tests to mock the sockaddr functions. +var getPrivateAddress getIPFunc = sockaddr.GetPrivateIP +var getPublicAddress getIPFunc = sockaddr.GetPublicIP // calculateAdvertiseAddress attempts to clone logic from deep within memberlist // (NetTransport.FinalAdvertiseAddr) in order to surface its conclusions to the @@ -31,7 +32,7 @@ var getPrivateAddress getPrivateIPFunc = sockaddr.GetPrivateIP // inadvertently misconfigured their cluster. // // https://github.com/hashicorp/memberlist/blob/022f081/net_transport.go#L126 -func calculateAdvertiseAddress(bindAddr, advertiseAddr string) (net.IP, error) { +func calculateAdvertiseAddress(bindAddr, advertiseAddr string, allowInsecureAdvertise bool) (net.IP, error) { if advertiseAddr != "" { ip := net.ParseIP(advertiseAddr) if ip == nil { @@ -44,18 +45,7 @@ func calculateAdvertiseAddress(bindAddr, advertiseAddr string) (net.IP, error) { } if isAny(bindAddr) { - privateIP, err := getPrivateAddress() - if err != nil { - return nil, errors.Wrap(err, "failed to get private IP") - } - if privateIP == "" { - return nil, errors.New("no private IP found, explicit advertise addr not provided") - } - ip := net.ParseIP(privateIP) - if ip == nil { - return nil, errors.Errorf("failed to parse private IP '%s'", privateIP) - } - return ip, nil + return discoverAdvertiseAddress(allowInsecureAdvertise) } ip := net.ParseIP(bindAddr) @@ -64,3 +54,33 @@ func calculateAdvertiseAddress(bindAddr, advertiseAddr string) (net.IP, error) { } return ip, nil } + +// discoverAdvertiseAddress will attempt to get a single IP address to use as +// the advertise address when one is not explicitly provided. It defaults to +// using a private IP address, and if not found then using a public IP if +// insecure advertising is allowed. +func discoverAdvertiseAddress(allowInsecureAdvertise bool) (net.IP, error) { + addr, err := getPrivateAddress() + if err != nil { + return nil, errors.Wrap(err, "failed to get private IP") + } + if addr == "" && !allowInsecureAdvertise { + return nil, errors.New("no private IP found, explicit advertise addr not provided") + } + + if addr == "" { + addr, err = getPublicAddress() + if err != nil { + return nil, errors.Wrap(err, "failed to get public IP") + } + if addr == "" { + return nil, errors.New("no private/public IP found, explicit advertise addr not provided") + } + } + + ip := net.ParseIP(addr) + if ip == nil { + return nil, errors.Errorf("failed to parse discovered IP '%s'", addr) + } + return ip, nil +} diff --git a/cluster/advertise_test.go b/cluster/advertise_test.go index dc6b02e8..4f0b6e0d 100644 --- a/cluster/advertise_test.go +++ b/cluster/advertise_test.go @@ -28,13 +28,17 @@ func TestCalculateAdvertiseAddress(t *testing.T) { }() cases := []struct { - fn getPrivateIPFunc - bind, advertise string + name string + privateIPFn getIPFunc + publicIPFn getIPFunc + bind, advertise string + allowInsecureAdvertise bool expectedIP net.IP err bool }{ { + name: "use provided bind address", bind: "192.0.2.1", advertise: "", @@ -42,6 +46,7 @@ func TestCalculateAdvertiseAddress(t *testing.T) { err: false, }, { + name: "use provided advertise address", bind: "192.0.2.1", advertise: "192.0.2.2", @@ -49,44 +54,93 @@ func TestCalculateAdvertiseAddress(t *testing.T) { err: false, }, { - fn: func() (string, error) { return "192.0.2.1", nil }, - bind: "0.0.0.0", - advertise: "", + name: "discover private ip address", + privateIPFn: func() (string, error) { return "192.0.2.1", nil }, + bind: "0.0.0.0", + advertise: "", expectedIP: net.ParseIP("192.0.2.1"), err: false, }, { - fn: func() (string, error) { return "", errors.New("some error") }, - bind: "0.0.0.0", - advertise: "", + name: "error if getPrivateAddress errors", + privateIPFn: func() (string, error) { return "", errors.New("some error") }, + bind: "0.0.0.0", + advertise: "", err: true, }, { - fn: func() (string, error) { return "invalid", nil }, - bind: "0.0.0.0", - advertise: "", + name: "error if getPrivateAddress returns an invalid address", + privateIPFn: func() (string, error) { return "invalid", nil }, + bind: "0.0.0.0", + advertise: "", err: true, }, { - fn: func() (string, error) { return "", nil }, - bind: "0.0.0.0", - advertise: "", + name: "error if getPrivateAddress returns an empty address", + privateIPFn: func() (string, error) { return "", nil }, + bind: "0.0.0.0", + advertise: "", + + err: true, + }, + + { + name: "discover public advertise address", + privateIPFn: func() (string, error) { return "", nil }, + publicIPFn: func() (string, error) { return "192.0.2.1", nil }, + bind: "0.0.0.0", + advertise: "", + allowInsecureAdvertise: true, + + expectedIP: net.ParseIP("192.0.2.1"), + err: false, + }, + { + name: "error if getPublicAddress errors", + privateIPFn: func() (string, error) { return "", nil }, + publicIPFn: func() (string, error) { return "", errors.New("some error") }, + bind: "0.0.0.0", + advertise: "", + allowInsecureAdvertise: true, + + err: true, + }, + { + name: "error if getPublicAddress returns an invalid address", + privateIPFn: func() (string, error) { return "", nil }, + publicIPFn: func() (string, error) { return "invalid", nil }, + bind: "0.0.0.0", + advertise: "", + allowInsecureAdvertise: true, + + err: true, + }, + { + name: "error if getPublicAddress returns an empty address", + privateIPFn: func() (string, error) { return "", nil }, + publicIPFn: func() (string, error) { return "", nil }, + bind: "0.0.0.0", + advertise: "", + allowInsecureAdvertise: true, err: true, }, } for _, c := range cases { - getPrivateAddress = c.fn - got, err := calculateAdvertiseAddress(c.bind, c.advertise) - if c.err { - require.Error(t, err) - } else { + t.Run(c.name, func(t *testing.T) { + getPrivateAddress = c.privateIPFn + getPublicAddress = c.publicIPFn + got, err := calculateAdvertiseAddress(c.bind, c.advertise, c.allowInsecureAdvertise) + if c.err { + require.Error(t, err) + return + } require.NoError(t, err) require.Equal(t, c.expectedIP.String(), got.String()) - } + }) } } diff --git a/cluster/cluster.go b/cluster/cluster.go index 9214bc62..6de11c53 100644 --- a/cluster/cluster.go +++ b/cluster/cluster.go @@ -141,6 +141,7 @@ func Create( probeTimeout time.Duration, probeInterval time.Duration, tlsTransportConfig *TLSTransportConfig, + allowInsecureAdvertise bool, ) (*Peer, error) { bindHost, bindPortStr, err := net.SplitHostPort(bindAddr) if err != nil { @@ -172,7 +173,7 @@ func Create( level.Debug(l).Log("msg", "resolved peers to following addresses", "peers", strings.Join(resolvedPeers, ",")) // Initial validation of user-specified advertise address. - addr, err := calculateAdvertiseAddress(bindHost, advertiseHost) + addr, err := calculateAdvertiseAddress(bindHost, advertiseHost, allowInsecureAdvertise) if err != nil { level.Warn(l).Log("err", "couldn't deduce an advertise address: "+err.Error()) } else if hasNonlocal(resolvedPeers) && isUnroutable(addr.String()) { diff --git a/cluster/cluster_test.go b/cluster/cluster_test.go index cbbe1d10..56269896 100644 --- a/cluster/cluster_test.go +++ b/cluster/cluster_test.go @@ -53,6 +53,7 @@ func testJoinLeave(t *testing.T) { DefaultProbeTimeout, DefaultProbeInterval, nil, + false, ) require.NoError(t, err) require.NotNil(t, p) @@ -86,6 +87,7 @@ func testJoinLeave(t *testing.T) { DefaultProbeTimeout, DefaultProbeInterval, nil, + false, ) require.NoError(t, err) require.NotNil(t, p2) @@ -120,6 +122,7 @@ func testReconnect(t *testing.T) { DefaultProbeTimeout, DefaultProbeInterval, nil, + false, ) require.NoError(t, err) require.NotNil(t, p) @@ -144,6 +147,7 @@ func testReconnect(t *testing.T) { DefaultProbeTimeout, DefaultProbeInterval, nil, + false, ) require.NoError(t, err) require.NotNil(t, p2) @@ -183,6 +187,7 @@ func testRemoveFailedPeers(t *testing.T) { DefaultProbeTimeout, DefaultProbeInterval, nil, + false, ) require.NoError(t, err) require.NotNil(t, p) @@ -233,6 +238,7 @@ func testInitiallyFailingPeers(t *testing.T) { DefaultProbeTimeout, DefaultProbeInterval, nil, + false, ) require.NoError(t, err) require.NotNil(t, p) @@ -279,6 +285,7 @@ func testTLSConnection(t *testing.T) { DefaultProbeTimeout, DefaultProbeInterval, tlsTransportConfig1, + false, ) require.NoError(t, err) require.NotNil(t, p1) @@ -309,6 +316,7 @@ func testTLSConnection(t *testing.T) { DefaultProbeTimeout, DefaultProbeInterval, tlsTransportConfig2, + false, ) require.NoError(t, err) require.NotNil(t, p2) diff --git a/cmd/alertmanager/main.go b/cmd/alertmanager/main.go index cf2e3b6d..87a0116d 100644 --- a/cmd/alertmanager/main.go +++ b/cmd/alertmanager/main.go @@ -200,18 +200,19 @@ func run() int { clusterBindAddr = kingpin.Flag("cluster.listen-address", "Listen address for cluster. Set to empty string to disable HA mode."). Default(defaultClusterAddr).String() - clusterAdvertiseAddr = kingpin.Flag("cluster.advertise-address", "Explicit address to advertise in cluster.").String() - peers = kingpin.Flag("cluster.peer", "Initial peers (may be repeated).").Strings() - peerTimeout = kingpin.Flag("cluster.peer-timeout", "Time to wait between peers to send notifications.").Default("15s").Duration() - gossipInterval = kingpin.Flag("cluster.gossip-interval", "Interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated across the cluster more quickly at the expense of increased bandwidth.").Default(cluster.DefaultGossipInterval.String()).Duration() - pushPullInterval = kingpin.Flag("cluster.pushpull-interval", "Interval for gossip state syncs. Setting this interval lower (more frequent) will increase convergence speeds across larger clusters at the expense of increased bandwidth usage.").Default(cluster.DefaultPushPullInterval.String()).Duration() - tcpTimeout = kingpin.Flag("cluster.tcp-timeout", "Timeout for establishing a stream connection with a remote node for a full state sync, and for stream read and write operations.").Default(cluster.DefaultTcpTimeout.String()).Duration() - probeTimeout = kingpin.Flag("cluster.probe-timeout", "Timeout to wait for an ack from a probed node before assuming it is unhealthy. This should be set to 99-percentile of RTT (round-trip time) on your network.").Default(cluster.DefaultProbeTimeout.String()).Duration() - probeInterval = kingpin.Flag("cluster.probe-interval", "Interval between random node probes. Setting this lower (more frequent) will cause the cluster to detect failed nodes more quickly at the expense of increased bandwidth usage.").Default(cluster.DefaultProbeInterval.String()).Duration() - settleTimeout = kingpin.Flag("cluster.settle-timeout", "Maximum time to wait for cluster connections to settle before evaluating notifications.").Default(cluster.DefaultPushPullInterval.String()).Duration() - reconnectInterval = kingpin.Flag("cluster.reconnect-interval", "Interval between attempting to reconnect to lost peers.").Default(cluster.DefaultReconnectInterval.String()).Duration() - peerReconnectTimeout = kingpin.Flag("cluster.reconnect-timeout", "Length of time to attempt to reconnect to a lost peer.").Default(cluster.DefaultReconnectTimeout.String()).Duration() - tlsConfigFile = kingpin.Flag("cluster.tls-config", "[EXPERIMENTAL] Path to config yaml file that can enable mutual TLS within the gossip protocol.").Default("").String() + clusterAdvertiseAddr = kingpin.Flag("cluster.advertise-address", "Explicit address to advertise in cluster.").String() + peers = kingpin.Flag("cluster.peer", "Initial peers (may be repeated).").Strings() + peerTimeout = kingpin.Flag("cluster.peer-timeout", "Time to wait between peers to send notifications.").Default("15s").Duration() + gossipInterval = kingpin.Flag("cluster.gossip-interval", "Interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated across the cluster more quickly at the expense of increased bandwidth.").Default(cluster.DefaultGossipInterval.String()).Duration() + pushPullInterval = kingpin.Flag("cluster.pushpull-interval", "Interval for gossip state syncs. Setting this interval lower (more frequent) will increase convergence speeds across larger clusters at the expense of increased bandwidth usage.").Default(cluster.DefaultPushPullInterval.String()).Duration() + tcpTimeout = kingpin.Flag("cluster.tcp-timeout", "Timeout for establishing a stream connection with a remote node for a full state sync, and for stream read and write operations.").Default(cluster.DefaultTcpTimeout.String()).Duration() + probeTimeout = kingpin.Flag("cluster.probe-timeout", "Timeout to wait for an ack from a probed node before assuming it is unhealthy. This should be set to 99-percentile of RTT (round-trip time) on your network.").Default(cluster.DefaultProbeTimeout.String()).Duration() + probeInterval = kingpin.Flag("cluster.probe-interval", "Interval between random node probes. Setting this lower (more frequent) will cause the cluster to detect failed nodes more quickly at the expense of increased bandwidth usage.").Default(cluster.DefaultProbeInterval.String()).Duration() + settleTimeout = kingpin.Flag("cluster.settle-timeout", "Maximum time to wait for cluster connections to settle before evaluating notifications.").Default(cluster.DefaultPushPullInterval.String()).Duration() + reconnectInterval = kingpin.Flag("cluster.reconnect-interval", "Interval between attempting to reconnect to lost peers.").Default(cluster.DefaultReconnectInterval.String()).Duration() + peerReconnectTimeout = kingpin.Flag("cluster.reconnect-timeout", "Length of time to attempt to reconnect to a lost peer.").Default(cluster.DefaultReconnectTimeout.String()).Duration() + tlsConfigFile = kingpin.Flag("cluster.tls-config", "[EXPERIMENTAL] Path to config yaml file that can enable mutual TLS within the gossip protocol.").Default("").String() + allowInsecureAdvertise = kingpin.Flag("cluster.allow-insecure-public-advertise-address-discovery", "[EXPERIMENTAL] Allow alertmanager to discover and listen on a public IP address.").Bool() ) promlogflag.AddFlags(kingpin.CommandLine, &promlogConfig) @@ -252,6 +253,7 @@ func run() int { *probeTimeout, *probeInterval, tlsTransportConfig, + *allowInsecureAdvertise, ) if err != nil { level.Error(logger).Log("msg", "unable to initialize gossip mesh", "err", err)