cluster/api: improve metrics and cluster status
This commit is contained in:
parent
247bfff606
commit
3f2e00fbea
56
api/api.go
56
api/api.go
|
@ -189,11 +189,11 @@ func (api *API) status(w http.ResponseWriter, req *http.Request) {
|
|||
api.mtx.RLock()
|
||||
|
||||
var status = struct {
|
||||
ConfigYAML string `json:"configYAML"`
|
||||
ConfigJSON *config.Config `json:"configJSON"`
|
||||
VersionInfo map[string]string `json:"versionInfo"`
|
||||
Uptime time.Time `json:"uptime"`
|
||||
MeshStatus *meshStatus `json:"meshStatus"`
|
||||
ConfigYAML string `json:"configYAML"`
|
||||
ConfigJSON *config.Config `json:"configJSON"`
|
||||
VersionInfo map[string]string `json:"versionInfo"`
|
||||
Uptime time.Time `json:"uptime"`
|
||||
ClusterStatus *clusterStatus `json:"clusterStatus"`
|
||||
}{
|
||||
ConfigYAML: api.config.String(),
|
||||
ConfigJSON: api.config,
|
||||
|
@ -205,8 +205,8 @@ func (api *API) status(w http.ResponseWriter, req *http.Request) {
|
|||
"buildDate": version.BuildDate,
|
||||
"goVersion": version.GoVersion,
|
||||
},
|
||||
Uptime: api.uptime,
|
||||
MeshStatus: getMeshStatus(api),
|
||||
Uptime: api.uptime,
|
||||
ClusterStatus: getClusterStatus(api.peer),
|
||||
}
|
||||
|
||||
api.mtx.RUnlock()
|
||||
|
@ -214,45 +214,29 @@ func (api *API) status(w http.ResponseWriter, req *http.Request) {
|
|||
api.respond(w, status)
|
||||
}
|
||||
|
||||
type meshStatus struct {
|
||||
Name string `json:"name"`
|
||||
NickName string `json:"nickName"`
|
||||
Peers []peerStatus `json:"peers"`
|
||||
Connections []connectionStatus `json:"connections"`
|
||||
}
|
||||
|
||||
type peerStatus struct {
|
||||
Name string `json:"name"` // e.g. "00:00:00:00:00:01"
|
||||
NickName string `json:"nickName"` // e.g. "a"
|
||||
UID uint64 `json:"uid"` // e.g. "14015114173033265000"
|
||||
Name string `json:"name"`
|
||||
Address string `json:"address"`
|
||||
}
|
||||
|
||||
type connectionStatus struct {
|
||||
Address string `json:"address"`
|
||||
Outbound bool `json:"outbound"`
|
||||
State string `json:"state"`
|
||||
Info string `json:"info"`
|
||||
type clusterStatus struct {
|
||||
Name string `json:"name"`
|
||||
Peers []peerStatus `json:"peers"`
|
||||
}
|
||||
|
||||
func getMeshStatus(api *API) *meshStatus {
|
||||
if api.peer == nil {
|
||||
func getClusterStatus(p *cluster.Peer) *clusterStatus {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
s := &clusterStatus{Name: p.Name()}
|
||||
|
||||
strippedStatus := &meshStatus{
|
||||
Name: api.peer.Name(),
|
||||
NickName: "",
|
||||
}
|
||||
|
||||
for _, p := range api.peer.Peers() {
|
||||
strippedStatus.Peers = append(strippedStatus.Peers, peerStatus{
|
||||
Name: p.Name,
|
||||
NickName: "",
|
||||
UID: 0,
|
||||
for _, n := range p.Peers() {
|
||||
s.Peers = append(s.Peers, peerStatus{
|
||||
Name: n.Name,
|
||||
Address: n.Address(),
|
||||
})
|
||||
}
|
||||
|
||||
return strippedStatus
|
||||
return s
|
||||
}
|
||||
|
||||
func (api *API) alertGroups(w http.ResponseWriter, r *http.Request) {
|
||||
|
|
|
@ -228,7 +228,8 @@ type delegate struct {
|
|||
logger log.Logger
|
||||
bcast *memberlist.TransmitLimitedQueue
|
||||
|
||||
gossipMsgsReceived prometheus.Counter
|
||||
messagesReceived *prometheus.CounterVec
|
||||
messagesReceivedSize *prometheus.CounterVec
|
||||
}
|
||||
|
||||
func newDelegate(l log.Logger, reg prometheus.Registerer, p *Peer) *delegate {
|
||||
|
@ -236,10 +237,14 @@ func newDelegate(l log.Logger, reg prometheus.Registerer, p *Peer) *delegate {
|
|||
NumNodes: p.ClusterSize,
|
||||
RetransmitMult: 3,
|
||||
}
|
||||
gossipMsgsReceived := prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "alertmanager_gossip_messages_received_total",
|
||||
Help: "Total gossip NotifyMsg calls.",
|
||||
})
|
||||
messagesReceived := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "alertmanager_cluster_messages_received_total",
|
||||
Help: "Total number of cluster messsages received.",
|
||||
}, []string{"msg_type"})
|
||||
messagesReceivedSize := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "alertmanager_cluster_messages_received_size_total",
|
||||
Help: "Total size of cluster messages received.",
|
||||
}, []string{"msg_type"})
|
||||
gossipClusterMembers := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
|
||||
Name: "alertmanager_cluster_members",
|
||||
Help: "Number indicating current number of members in cluster.",
|
||||
|
@ -247,13 +252,14 @@ func newDelegate(l log.Logger, reg prometheus.Registerer, p *Peer) *delegate {
|
|||
return float64(p.ClusterSize())
|
||||
})
|
||||
|
||||
reg.MustRegister(gossipMsgsReceived, gossipClusterMembers)
|
||||
reg.MustRegister(messagesReceived, messagesReceivedSize, gossipClusterMembers)
|
||||
|
||||
return &delegate{
|
||||
logger: l,
|
||||
Peer: p,
|
||||
bcast: bcast,
|
||||
gossipMsgsReceived: gossipMsgsReceived,
|
||||
logger: l,
|
||||
Peer: p,
|
||||
bcast: bcast,
|
||||
messagesReceived: messagesReceived,
|
||||
messagesReceivedSize: messagesReceivedSize,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -264,7 +270,8 @@ func (d *delegate) NodeMeta(limit int) []byte {
|
|||
|
||||
// NotifyMsg is the callback invoked when a user-level gossip message is received.
|
||||
func (d *delegate) NotifyMsg(b []byte) {
|
||||
d.gossipMsgsReceived.Inc()
|
||||
d.messagesReceived.WithLabelValues("update").Inc()
|
||||
d.messagesReceivedSize.WithLabelValues("update").Add(float64(len(b)))
|
||||
|
||||
var p clusterpb.Part
|
||||
if err := proto.Unmarshal(b, &p); err != nil {
|
||||
|
@ -308,6 +315,9 @@ func (d *delegate) LocalState(_ bool) []byte {
|
|||
}
|
||||
|
||||
func (d *delegate) MergeRemoteState(buf []byte, _ bool) {
|
||||
d.messagesReceived.WithLabelValues("full_state").Inc()
|
||||
d.messagesReceivedSize.WithLabelValues("full_state").Add(float64(len(buf)))
|
||||
|
||||
var fs clusterpb.FullState
|
||||
if err := proto.Unmarshal(buf, &fs); err != nil {
|
||||
level.Warn(d.logger).Log("msg", "merge remote state", "err", err)
|
||||
|
|
Loading…
Reference in New Issue