cluster/api: improve metrics and cluster status

This commit is contained in:
Fabian Reinartz 2018-02-09 11:16:00 +01:00
parent 247bfff606
commit 3f2e00fbea
2 changed files with 41 additions and 47 deletions

View File

@ -189,11 +189,11 @@ func (api *API) status(w http.ResponseWriter, req *http.Request) {
api.mtx.RLock()
var status = struct {
ConfigYAML string `json:"configYAML"`
ConfigJSON *config.Config `json:"configJSON"`
VersionInfo map[string]string `json:"versionInfo"`
Uptime time.Time `json:"uptime"`
MeshStatus *meshStatus `json:"meshStatus"`
ConfigYAML string `json:"configYAML"`
ConfigJSON *config.Config `json:"configJSON"`
VersionInfo map[string]string `json:"versionInfo"`
Uptime time.Time `json:"uptime"`
ClusterStatus *clusterStatus `json:"clusterStatus"`
}{
ConfigYAML: api.config.String(),
ConfigJSON: api.config,
@ -205,8 +205,8 @@ func (api *API) status(w http.ResponseWriter, req *http.Request) {
"buildDate": version.BuildDate,
"goVersion": version.GoVersion,
},
Uptime: api.uptime,
MeshStatus: getMeshStatus(api),
Uptime: api.uptime,
ClusterStatus: getClusterStatus(api.peer),
}
api.mtx.RUnlock()
@ -214,45 +214,29 @@ func (api *API) status(w http.ResponseWriter, req *http.Request) {
api.respond(w, status)
}
type meshStatus struct {
Name string `json:"name"`
NickName string `json:"nickName"`
Peers []peerStatus `json:"peers"`
Connections []connectionStatus `json:"connections"`
}
type peerStatus struct {
Name string `json:"name"` // e.g. "00:00:00:00:00:01"
NickName string `json:"nickName"` // e.g. "a"
UID uint64 `json:"uid"` // e.g. "14015114173033265000"
Name string `json:"name"`
Address string `json:"address"`
}
type connectionStatus struct {
Address string `json:"address"`
Outbound bool `json:"outbound"`
State string `json:"state"`
Info string `json:"info"`
type clusterStatus struct {
Name string `json:"name"`
Peers []peerStatus `json:"peers"`
}
func getMeshStatus(api *API) *meshStatus {
if api.peer == nil {
func getClusterStatus(p *cluster.Peer) *clusterStatus {
if p == nil {
return nil
}
s := &clusterStatus{Name: p.Name()}
strippedStatus := &meshStatus{
Name: api.peer.Name(),
NickName: "",
}
for _, p := range api.peer.Peers() {
strippedStatus.Peers = append(strippedStatus.Peers, peerStatus{
Name: p.Name,
NickName: "",
UID: 0,
for _, n := range p.Peers() {
s.Peers = append(s.Peers, peerStatus{
Name: n.Name,
Address: n.Address(),
})
}
return strippedStatus
return s
}
func (api *API) alertGroups(w http.ResponseWriter, r *http.Request) {

View File

@ -228,7 +228,8 @@ type delegate struct {
logger log.Logger
bcast *memberlist.TransmitLimitedQueue
gossipMsgsReceived prometheus.Counter
messagesReceived *prometheus.CounterVec
messagesReceivedSize *prometheus.CounterVec
}
func newDelegate(l log.Logger, reg prometheus.Registerer, p *Peer) *delegate {
@ -236,10 +237,14 @@ func newDelegate(l log.Logger, reg prometheus.Registerer, p *Peer) *delegate {
NumNodes: p.ClusterSize,
RetransmitMult: 3,
}
gossipMsgsReceived := prometheus.NewCounter(prometheus.CounterOpts{
Name: "alertmanager_gossip_messages_received_total",
Help: "Total gossip NotifyMsg calls.",
})
messagesReceived := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "alertmanager_cluster_messages_received_total",
Help: "Total number of cluster messsages received.",
}, []string{"msg_type"})
messagesReceivedSize := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "alertmanager_cluster_messages_received_size_total",
Help: "Total size of cluster messages received.",
}, []string{"msg_type"})
gossipClusterMembers := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "alertmanager_cluster_members",
Help: "Number indicating current number of members in cluster.",
@ -247,13 +252,14 @@ func newDelegate(l log.Logger, reg prometheus.Registerer, p *Peer) *delegate {
return float64(p.ClusterSize())
})
reg.MustRegister(gossipMsgsReceived, gossipClusterMembers)
reg.MustRegister(messagesReceived, messagesReceivedSize, gossipClusterMembers)
return &delegate{
logger: l,
Peer: p,
bcast: bcast,
gossipMsgsReceived: gossipMsgsReceived,
logger: l,
Peer: p,
bcast: bcast,
messagesReceived: messagesReceived,
messagesReceivedSize: messagesReceivedSize,
}
}
@ -264,7 +270,8 @@ func (d *delegate) NodeMeta(limit int) []byte {
// NotifyMsg is the callback invoked when a user-level gossip message is received.
func (d *delegate) NotifyMsg(b []byte) {
d.gossipMsgsReceived.Inc()
d.messagesReceived.WithLabelValues("update").Inc()
d.messagesReceivedSize.WithLabelValues("update").Add(float64(len(b)))
var p clusterpb.Part
if err := proto.Unmarshal(b, &p); err != nil {
@ -308,6 +315,9 @@ func (d *delegate) LocalState(_ bool) []byte {
}
func (d *delegate) MergeRemoteState(buf []byte, _ bool) {
d.messagesReceived.WithLabelValues("full_state").Inc()
d.messagesReceivedSize.WithLabelValues("full_state").Add(float64(len(buf)))
var fs clusterpb.FullState
if err := proto.Unmarshal(buf, &fs); err != nil {
level.Warn(d.logger).Log("msg", "merge remote state", "err", err)