Add dropped alertmanagers to alertmanagers API (#3865)
This commit is contained in:
parent
93a63ac5fd
commit
1fd20fc954
|
@ -356,9 +356,6 @@ $ curl http://localhost:9090/api/v1/targets
|
|||
|
||||
## Alertmanagers
|
||||
|
||||
> This API is experimental as it is intended to be extended with Alertmanagers
|
||||
> dropped due to relabelling in the future.
|
||||
|
||||
The following endpoint returns an overview of the current state of the
|
||||
Prometheus alertmanager discovery:
|
||||
|
||||
|
@ -366,7 +363,7 @@ Prometheus alertmanager discovery:
|
|||
GET /api/v1/alertmanagers
|
||||
```
|
||||
|
||||
Currently only the active Alertmanagers are part of the response.
|
||||
Both the active and dropped Alertmanagers are part of the response.
|
||||
|
||||
```json
|
||||
$ curl http://localhost:9090/api/v1/alertmanagers
|
||||
|
@ -377,6 +374,11 @@ $ curl http://localhost:9090/api/v1/alertmanagers
|
|||
{
|
||||
"url": "http://127.0.0.1:9090/api/v1/alerts"
|
||||
}
|
||||
],
|
||||
"droppedAlertmanagers": [
|
||||
{
|
||||
"url": "http://127.0.0.1:9093/api/v1/alerts"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
@ -419,6 +419,25 @@ func (n *Manager) Alertmanagers() []*url.URL {
|
|||
return res
|
||||
}
|
||||
|
||||
// DroppedAlertmanagers returns a slice of Alertmanager URLs.
|
||||
func (n *Manager) DroppedAlertmanagers() []*url.URL {
|
||||
n.mtx.RLock()
|
||||
amSets := n.alertmanagers
|
||||
n.mtx.RUnlock()
|
||||
|
||||
var res []*url.URL
|
||||
|
||||
for _, ams := range amSets {
|
||||
ams.mtx.RLock()
|
||||
for _, dam := range ams.droppedAms {
|
||||
res = append(res, dam.url())
|
||||
}
|
||||
ams.mtx.RUnlock()
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
// sendAll sends the alerts to all configured Alertmanagers concurrently.
|
||||
// It returns true if the alerts could be sent successfully to at least one Alertmanager.
|
||||
func (n *Manager) sendAll(alerts ...*Alert) bool {
|
||||
|
@ -519,9 +538,10 @@ type alertmanagerSet struct {
|
|||
|
||||
metrics *alertMetrics
|
||||
|
||||
mtx sync.RWMutex
|
||||
ams []alertmanager
|
||||
logger log.Logger
|
||||
mtx sync.RWMutex
|
||||
ams []alertmanager
|
||||
droppedAms []alertmanager
|
||||
logger log.Logger
|
||||
}
|
||||
|
||||
func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger) (*alertmanagerSet, error) {
|
||||
|
@ -540,24 +560,28 @@ func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger) (*ale
|
|||
// sync extracts a deduplicated set of Alertmanager endpoints from a list
|
||||
// of target groups definitions.
|
||||
func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
|
||||
all := []alertmanager{}
|
||||
allAms := []alertmanager{}
|
||||
allDroppedAms := []alertmanager{}
|
||||
|
||||
for _, tg := range tgs {
|
||||
ams, err := alertmanagerFromGroup(tg, s.cfg)
|
||||
ams, droppedAms, err := alertmanagerFromGroup(tg, s.cfg)
|
||||
if err != nil {
|
||||
level.Error(s.logger).Log("msg", "Creating discovered Alertmanagers failed", "err", err)
|
||||
continue
|
||||
}
|
||||
all = append(all, ams...)
|
||||
allAms = append(allAms, ams...)
|
||||
allDroppedAms = append(allDroppedAms, droppedAms...)
|
||||
}
|
||||
|
||||
s.mtx.Lock()
|
||||
defer s.mtx.Unlock()
|
||||
// Set new Alertmanagers and deduplicate them along their unique URL.
|
||||
s.ams = []alertmanager{}
|
||||
s.droppedAms = []alertmanager{}
|
||||
s.droppedAms = append(s.droppedAms, allDroppedAms...)
|
||||
seen := map[string]struct{}{}
|
||||
|
||||
for _, am := range all {
|
||||
for _, am := range allAms {
|
||||
us := am.url().String()
|
||||
if _, ok := seen[us]; ok {
|
||||
continue
|
||||
|
@ -578,8 +602,9 @@ func postPath(pre string) string {
|
|||
|
||||
// alertmanagersFromGroup extracts a list of alertmanagers from a target group and an associcated
|
||||
// AlertmanagerConfig.
|
||||
func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig) ([]alertmanager, error) {
|
||||
func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig) ([]alertmanager, []alertmanager, error) {
|
||||
var res []alertmanager
|
||||
var droppedAlertManagers []alertmanager
|
||||
|
||||
for _, tlset := range tg.Targets {
|
||||
lbls := make([]labels.Label, 0, len(tlset)+2+len(tg.Labels))
|
||||
|
@ -600,6 +625,7 @@ func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig
|
|||
|
||||
lset := relabel.Process(labels.New(lbls...), cfg.RelabelConfigs...)
|
||||
if lset == nil {
|
||||
droppedAlertManagers = append(droppedAlertManagers, alertmanagerLabels{lbls})
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -627,13 +653,13 @@ func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig
|
|||
case "https":
|
||||
addr = addr + ":443"
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid scheme: %q", cfg.Scheme)
|
||||
return nil, nil, fmt.Errorf("invalid scheme: %q", cfg.Scheme)
|
||||
}
|
||||
lb.Set(model.AddressLabel, addr)
|
||||
}
|
||||
|
||||
if err := config.CheckTargetAddress(model.LabelValue(addr)); err != nil {
|
||||
return nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Meta labels are deleted after relabelling. Other internal labels propagate to
|
||||
|
@ -646,5 +672,5 @@ func alertmanagerFromGroup(tg *targetgroup.Group, cfg *config.AlertmanagerConfig
|
|||
|
||||
res = append(res, alertmanagerLabels{lset})
|
||||
}
|
||||
return res, nil
|
||||
return res, droppedAlertManagers, nil
|
||||
}
|
||||
|
|
|
@ -442,7 +442,7 @@ func (a alertmanagerMock) url() *url.URL {
|
|||
|
||||
func TestLabelSetNotReused(t *testing.T) {
|
||||
tg := makeInputTargetGroup()
|
||||
_, err := alertmanagerFromGroup(tg, &config.AlertmanagerConfig{})
|
||||
_, _, err := alertmanagerFromGroup(tg, &config.AlertmanagerConfig{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -503,6 +503,61 @@ alerting:
|
|||
|
||||
}
|
||||
|
||||
func TestDroppedAlertmanagers(t *testing.T) {
|
||||
var tests = []struct {
|
||||
in *targetgroup.Group
|
||||
out string
|
||||
}{
|
||||
{
|
||||
in: &targetgroup.Group{
|
||||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__address__": "alertmanager:9093",
|
||||
},
|
||||
},
|
||||
},
|
||||
out: "http://alertmanager:9093/api/v1/alerts",
|
||||
},
|
||||
}
|
||||
|
||||
n := NewManager(&Options{}, nil)
|
||||
|
||||
cfg := &config.Config{}
|
||||
s := `
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
relabel_configs:
|
||||
- source_labels: ['__address__']
|
||||
regex: 'alertmanager:9093'
|
||||
action: drop
|
||||
`
|
||||
if err := yaml.Unmarshal([]byte(s), cfg); err != nil {
|
||||
t.Fatalf("Unable to load YAML config: %s", err)
|
||||
}
|
||||
|
||||
if err := n.ApplyConfig(cfg); err != nil {
|
||||
t.Fatalf("Error Applying the config:%v", err)
|
||||
}
|
||||
|
||||
tgs := make(map[string][]*targetgroup.Group)
|
||||
for _, tt := range tests {
|
||||
|
||||
b, err := json.Marshal(cfg.AlertingConfig.AlertmanagerConfigs[0])
|
||||
if err != nil {
|
||||
t.Fatalf("Error creating config hash:%v", err)
|
||||
}
|
||||
tgs[fmt.Sprintf("%x", md5.Sum(b))] = []*targetgroup.Group{
|
||||
tt.in,
|
||||
}
|
||||
n.reload(tgs)
|
||||
res := n.DroppedAlertmanagers()[0].String()
|
||||
|
||||
testutil.Equals(t, res, tt.out)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func makeInputTargetGroup() *targetgroup.Group {
|
||||
return &targetgroup.Group{
|
||||
Targets: []model.LabelSet{
|
||||
|
|
|
@ -87,6 +87,7 @@ type targetRetriever interface {
|
|||
|
||||
type alertmanagerRetriever interface {
|
||||
Alertmanagers() []*url.URL
|
||||
DroppedAlertmanagers() []*url.URL
|
||||
}
|
||||
|
||||
type response struct {
|
||||
|
@ -468,7 +469,8 @@ func (api *API) targets(r *http.Request) (interface{}, *apiError) {
|
|||
|
||||
// AlertmanagerDiscovery has all the active Alertmanagers.
|
||||
type AlertmanagerDiscovery struct {
|
||||
ActiveAlertmanagers []*AlertmanagerTarget `json:"activeAlertmanagers"`
|
||||
ActiveAlertmanagers []*AlertmanagerTarget `json:"activeAlertmanagers"`
|
||||
DroppedAlertmanagers []*AlertmanagerTarget `json:"droppedAlertmanagers"`
|
||||
}
|
||||
|
||||
// AlertmanagerTarget has info on one AM.
|
||||
|
@ -478,12 +480,14 @@ type AlertmanagerTarget struct {
|
|||
|
||||
func (api *API) alertmanagers(r *http.Request) (interface{}, *apiError) {
|
||||
urls := api.alertmanagerRetriever.Alertmanagers()
|
||||
ams := &AlertmanagerDiscovery{ActiveAlertmanagers: make([]*AlertmanagerTarget, len(urls))}
|
||||
|
||||
droppedURLS := api.alertmanagerRetriever.DroppedAlertmanagers()
|
||||
ams := &AlertmanagerDiscovery{ActiveAlertmanagers: make([]*AlertmanagerTarget, len(urls)), DroppedAlertmanagers: make([]*AlertmanagerTarget, len(droppedURLS))}
|
||||
for i, url := range urls {
|
||||
ams.ActiveAlertmanagers[i] = &AlertmanagerTarget{URL: url.String()}
|
||||
}
|
||||
|
||||
for i, url := range droppedURLS {
|
||||
ams.DroppedAlertmanagers[i] = &AlertmanagerTarget{URL: url.String()}
|
||||
}
|
||||
return ams, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -48,10 +48,26 @@ func (f targetRetrieverFunc) Targets() []*scrape.Target {
|
|||
return f()
|
||||
}
|
||||
|
||||
type alertmanagerRetrieverFunc func() []*url.URL
|
||||
type testAlertmanagerRetriever struct{}
|
||||
|
||||
func (f alertmanagerRetrieverFunc) Alertmanagers() []*url.URL {
|
||||
return f()
|
||||
func (t testAlertmanagerRetriever) Alertmanagers() []*url.URL {
|
||||
return []*url.URL{
|
||||
{
|
||||
Scheme: "http",
|
||||
Host: "alertmanager.example.com:8080",
|
||||
Path: "/api/v1/alerts",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (t testAlertmanagerRetriever) DroppedAlertmanagers() []*url.URL {
|
||||
return []*url.URL{
|
||||
{
|
||||
Scheme: "http",
|
||||
Host: "dropped.alertmanager.example.com:8080",
|
||||
Path: "/api/v1/alerts",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
var samplePrometheusCfg = config.Config{
|
||||
|
@ -100,13 +116,7 @@ func TestEndpoints(t *testing.T) {
|
|||
}
|
||||
})
|
||||
|
||||
ar := alertmanagerRetrieverFunc(func() []*url.URL {
|
||||
return []*url.URL{{
|
||||
Scheme: "http",
|
||||
Host: "alertmanager.example.com:8080",
|
||||
Path: "/api/v1/alerts",
|
||||
}}
|
||||
})
|
||||
var ar testAlertmanagerRetriever
|
||||
|
||||
api := &API{
|
||||
Queryable: suite.Storage(),
|
||||
|
@ -447,6 +457,11 @@ func TestEndpoints(t *testing.T) {
|
|||
URL: "http://alertmanager.example.com:8080/api/v1/alerts",
|
||||
},
|
||||
},
|
||||
DroppedAlertmanagers: []*AlertmanagerTarget{
|
||||
{
|
||||
URL: "http://dropped.alertmanager.example.com:8080/api/v1/alerts",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue