alertmanager/api.go

440 lines
9.8 KiB
Go
Raw Normal View History

2015-10-11 15:24:49 +00:00
// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2015-09-25 16:14:46 +00:00
package main
2015-07-01 11:17:08 +00:00
import (
"encoding/json"
"fmt"
"net/http"
2015-09-27 12:07:04 +00:00
"strconv"
"sync"
2015-09-26 09:12:59 +00:00
"time"
2015-07-01 11:17:08 +00:00
2016-01-09 12:16:00 +00:00
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
2015-10-16 12:02:22 +00:00
"github.com/prometheus/common/model"
2015-07-01 11:17:08 +00:00
"github.com/prometheus/common/route"
"github.com/prometheus/common/version"
2015-07-01 11:17:08 +00:00
"golang.org/x/net/context"
2015-09-26 09:12:59 +00:00
"github.com/prometheus/alertmanager/provider"
"github.com/prometheus/alertmanager/types"
2015-07-01 11:17:08 +00:00
)
var (
numReceivedAlerts = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "alerts_received_total",
Help: "The total number of received alerts.",
}, []string{"status"})
numInvalidAlerts = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "alertmanager",
2016-01-09 12:49:25 +00:00
Name: "alerts_invalid_total",
Help: "The total number of received alerts that were invalid.",
})
)
func init() {
prometheus.Register(numReceivedAlerts)
prometheus.Register(numInvalidAlerts)
}
2015-11-05 09:49:32 +00:00
// API provides registration of handlers for API routes.
2015-07-01 11:17:08 +00:00
type API struct {
2015-11-27 14:41:22 +00:00
alerts provider.Alerts
silences provider.Silences
config string
resolveTimeout time.Duration
uptime time.Time
groups func() AlertOverview
2015-07-01 11:17:08 +00:00
// context is an indirection for testing.
context func(r *http.Request) context.Context
mtx sync.RWMutex
2015-07-01 11:17:08 +00:00
}
2015-11-05 09:49:32 +00:00
// NewAPI returns a new API.
func NewAPI(alerts provider.Alerts, silences provider.Silences, gf func() AlertOverview) *API {
return &API{
2015-09-27 12:07:04 +00:00
context: route.Context,
alerts: alerts,
silences: silences,
groups: gf,
2015-11-02 19:04:37 +00:00
uptime: time.Now(),
2015-07-01 11:17:08 +00:00
}
}
2016-02-14 14:40:48 +00:00
// Register registers the API handlers under their correct routes
2015-11-05 09:49:32 +00:00
// in the given router.
func (api *API) Register(r *route.Router) {
2016-01-09 12:16:00 +00:00
ihf := prometheus.InstrumentHandlerFunc
2015-10-16 12:02:22 +00:00
// Register legacy forwarder for alert pushing.
2016-01-09 12:16:00 +00:00
r.Post("/alerts", ihf("legacy_add_alerts", api.legacyAddAlerts))
2015-10-16 12:02:22 +00:00
// Register actual API.
r = r.WithPrefix("/v1")
2016-01-09 12:16:00 +00:00
r.Get("/status", ihf("status", api.status))
r.Get("/alerts/groups", ihf("alert_groups", api.alertGroups))
2015-07-01 11:17:08 +00:00
2016-01-09 12:16:00 +00:00
r.Get("/alerts", ihf("list_alerts", api.listAlerts))
r.Post("/alerts", ihf("add_alerts", api.addAlerts))
2015-07-01 11:17:08 +00:00
2016-01-09 12:16:00 +00:00
r.Get("/silences", ihf("list_silences", api.listSilences))
r.Post("/silences", ihf("add_silence", api.addSilence))
r.Get("/silence/:sid", ihf("get_silence", api.getSilence))
r.Del("/silence/:sid", ihf("del_silence", api.delSilence))
}
2015-11-05 09:49:32 +00:00
// Update sets the configuration string to a new value.
2015-11-27 14:41:22 +00:00
func (api *API) Update(config string, resolveTimeout time.Duration) {
api.mtx.Lock()
defer api.mtx.Unlock()
2015-07-01 11:17:08 +00:00
api.config = config
2015-11-27 14:41:22 +00:00
api.resolveTimeout = resolveTimeout
2015-07-01 11:17:08 +00:00
}
type errorType string
const (
errorNone errorType = ""
2015-12-07 12:41:18 +00:00
errorInternal = "server_error"
2015-07-01 11:17:08 +00:00
errorBadData = "bad_data"
)
type apiError struct {
typ errorType
err error
}
func (e *apiError) Error() string {
return fmt.Sprintf("%s: %s", e.typ, e.err)
}
func (api *API) status(w http.ResponseWriter, req *http.Request) {
api.mtx.RLock()
var status = struct {
Config string `json:"config"`
VersionInfo map[string]string `json:"versionInfo"`
2015-11-02 19:04:37 +00:00
Uptime time.Time `json:"uptime"`
}{
Config: api.config,
VersionInfo: map[string]string{
"version": version.Version,
"revision": version.Revision,
"branch": version.Branch,
"buildUser": version.BuildUser,
"buildDate": version.BuildDate,
"goVersion": version.GoVersion,
},
Uptime: api.uptime,
}
api.mtx.RUnlock()
respond(w, status)
}
func (api *API) alertGroups(w http.ResponseWriter, req *http.Request) {
respond(w, api.groups())
}
2015-09-26 09:12:59 +00:00
func (api *API) listAlerts(w http.ResponseWriter, r *http.Request) {
2015-10-15 10:47:15 +00:00
alerts := api.alerts.GetPending()
defer alerts.Close()
var (
err error
res []*types.Alert
)
// TODO(fabxc): enforce a sensible timeout.
for a := range alerts.Next() {
if err = alerts.Err(); err != nil {
break
}
res = append(res, a)
}
if err != nil {
respondError(w, apiError{
2015-12-07 12:41:18 +00:00
typ: errorInternal,
2015-10-15 10:47:15 +00:00
err: err,
}, nil)
return
}
respond(w, types.Alerts(res...))
2015-09-26 09:12:59 +00:00
}
2015-09-25 16:14:46 +00:00
2015-10-16 12:02:22 +00:00
func (api *API) legacyAddAlerts(w http.ResponseWriter, r *http.Request) {
var legacyAlerts = []struct {
Summary model.LabelValue `json:"summary"`
Description model.LabelValue `json:"description"`
Runbook model.LabelValue `json:"runbook"`
Labels model.LabelSet `json:"labels"`
Payload model.LabelSet `json:"payload"`
}{}
if err := receive(r, &legacyAlerts); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
var alerts []*types.Alert
for _, la := range legacyAlerts {
a := &types.Alert{
Alert: model.Alert{
Labels: la.Labels,
Annotations: la.Payload,
},
}
if a.Annotations == nil {
a.Annotations = model.LabelSet{}
}
2015-10-16 12:02:22 +00:00
a.Annotations["summary"] = la.Summary
a.Annotations["description"] = la.Description
a.Annotations["runbook"] = la.Runbook
alerts = append(alerts, a)
}
2015-12-09 17:21:06 +00:00
api.insertAlerts(w, r, alerts...)
}
2015-10-16 12:02:22 +00:00
2015-12-09 17:21:06 +00:00
func (api *API) addAlerts(w http.ResponseWriter, r *http.Request) {
var alerts []*types.Alert
if err := receive(r, &alerts); err != nil {
2015-10-16 12:02:22 +00:00
respondError(w, apiError{
2015-12-09 17:21:06 +00:00
typ: errorBadData,
2015-10-16 12:02:22 +00:00
err: err,
}, nil)
return
}
2015-12-09 17:21:06 +00:00
api.insertAlerts(w, r, alerts...)
2015-10-16 12:02:22 +00:00
}
2015-12-09 17:21:06 +00:00
func (api *API) insertAlerts(w http.ResponseWriter, r *http.Request, alerts ...*types.Alert) {
now := time.Now()
for _, alert := range alerts {
2015-09-29 15:26:44 +00:00
alert.UpdatedAt = now
// Ensure StartsAt is set.
2015-09-29 15:26:44 +00:00
if alert.StartsAt.IsZero() {
alert.StartsAt = now
2015-09-26 09:12:59 +00:00
}
// If no end time is defined, set a timeout after which an alert
// is marked resolved if it is not updated.
2015-09-29 15:26:44 +00:00
if alert.EndsAt.IsZero() {
alert.Timeout = true
2016-02-03 13:12:10 +00:00
alert.EndsAt = now.Add(api.resolveTimeout)
numReceivedAlerts.WithLabelValues("firing").Inc()
} else {
numReceivedAlerts.WithLabelValues("resolved").Inc()
2015-09-26 09:12:59 +00:00
}
}
2015-09-25 16:14:46 +00:00
// Make a best effort to insert all alerts that are valid.
var (
validAlerts = make([]*types.Alert, 0, len(alerts))
validationErrs = &types.MultiError{}
)
2015-12-09 17:21:06 +00:00
for _, a := range alerts {
if err := a.Validate(); err != nil {
validationErrs.Add(err)
numInvalidAlerts.Inc()
continue
2015-12-09 17:21:06 +00:00
}
validAlerts = append(validAlerts, a)
2015-12-09 17:21:06 +00:00
}
if err := api.alerts.Put(validAlerts...); err != nil {
2015-09-26 09:12:59 +00:00
respondError(w, apiError{
2015-12-07 12:41:18 +00:00
typ: errorInternal,
2015-09-26 09:12:59 +00:00
err: err,
}, nil)
return
}
if validationErrs.Len() > 0 {
respondError(w, apiError{
typ: errorBadData,
err: validationErrs,
}, nil)
return
}
2015-09-25 16:14:46 +00:00
2015-09-26 09:12:59 +00:00
respond(w, nil)
}
2015-09-25 16:14:46 +00:00
2015-09-27 12:07:04 +00:00
func (api *API) addSilence(w http.ResponseWriter, r *http.Request) {
var sil types.Silence
if err := receive(r, &sil); err != nil {
2015-12-16 15:39:37 +00:00
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
2015-09-27 12:07:04 +00:00
return
}
2015-10-06 10:10:15 +00:00
if sil.CreatedAt.IsZero() {
sil.CreatedAt = time.Now()
}
2015-12-09 17:21:06 +00:00
if err := sil.Validate(); err != nil {
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
return
}
sid, err := api.silences.Set(&sil)
if err != nil {
2015-09-27 12:07:04 +00:00
respondError(w, apiError{
2015-12-07 12:41:18 +00:00
typ: errorInternal,
2015-09-27 12:07:04 +00:00
err: err,
}, nil)
return
}
respond(w, struct {
SilenceID uint64 `json:"silenceId"`
}{
SilenceID: sid,
})
2015-09-27 12:07:04 +00:00
}
func (api *API) getSilence(w http.ResponseWriter, r *http.Request) {
sids := route.Param(api.context(r), "sid")
sid, err := strconv.ParseUint(sids, 10, 64)
if err != nil {
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
2015-12-09 17:21:06 +00:00
return
2015-09-27 12:07:04 +00:00
}
2015-10-01 15:50:15 +00:00
sil, err := api.silences.Get(sid)
2015-09-27 12:07:04 +00:00
if err != nil {
http.Error(w, fmt.Sprint("Error getting silence: ", err), http.StatusNotFound)
return
}
respond(w, &sil)
}
func (api *API) delSilence(w http.ResponseWriter, r *http.Request) {
sids := route.Param(api.context(r), "sid")
sid, err := strconv.ParseUint(sids, 10, 64)
if err != nil {
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
2015-12-09 17:21:06 +00:00
return
2015-09-27 12:07:04 +00:00
}
2015-10-01 15:50:15 +00:00
if err := api.silences.Del(sid); err != nil {
2015-09-27 12:07:04 +00:00
respondError(w, apiError{
2015-12-07 12:41:18 +00:00
typ: errorInternal,
2015-09-27 12:07:04 +00:00
err: err,
}, nil)
return
}
respond(w, nil)
}
func (api *API) listSilences(w http.ResponseWriter, r *http.Request) {
sils, err := api.silences.All()
if err != nil {
respondError(w, apiError{
2015-12-07 12:41:18 +00:00
typ: errorInternal,
2015-09-27 12:07:04 +00:00
err: err,
}, nil)
return
}
respond(w, sils)
}
2015-07-01 11:17:08 +00:00
type status string
const (
statusSuccess status = "success"
statusError = "error"
)
type response struct {
Status status `json:"status"`
Data interface{} `json:"data,omitempty"`
ErrorType errorType `json:"errorType,omitempty"`
Error string `json:"error,omitempty"`
}
func respond(w http.ResponseWriter, data interface{}) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(200)
2015-07-01 11:17:08 +00:00
b, err := json.Marshal(&response{
Status: statusSuccess,
Data: data,
})
if err != nil {
return
}
w.Write(b)
}
func respondError(w http.ResponseWriter, apiErr apiError, data interface{}) {
w.Header().Set("Content-Type", "application/json")
2015-12-07 12:41:18 +00:00
switch apiErr.typ {
case errorBadData:
w.WriteHeader(http.StatusBadRequest)
case errorInternal:
w.WriteHeader(http.StatusInternalServerError)
default:
2016-02-15 10:18:23 +00:00
panic(fmt.Sprintf("unknown error type %q", apiErr))
2015-12-07 12:41:18 +00:00
}
2015-07-01 11:17:08 +00:00
b, err := json.Marshal(&response{
Status: statusError,
ErrorType: apiErr.typ,
Error: apiErr.err.Error(),
Data: data,
})
if err != nil {
return
}
2015-12-07 12:41:18 +00:00
log.Errorf("api error: %v", apiErr)
2015-07-01 11:17:08 +00:00
w.Write(b)
}
func receive(r *http.Request, v interface{}) error {
dec := json.NewDecoder(r.Body)
defer r.Body.Close()
err := dec.Decode(v)
if err != nil {
log.Debugf("Decoding request failed: %v", err)
}
return err
2015-07-01 11:17:08 +00:00
}