alertmanager/api.go
Fabian Reinartz 81cbf3cda7 *: refactor Silence type, use UUID
This commit removes the dependency on model.Silence for the internal
Silence type, uses UUIDs instead of uint64s and clarifies invariants
around timestamp handling.

The created_at timestamp is removed for the time being.
2016-08-09 11:59:35 +02:00

448 lines
10 KiB
Go

// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"encoding/json"
"fmt"
"net/http"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"github.com/prometheus/common/model"
"github.com/prometheus/common/route"
"github.com/prometheus/common/version"
"github.com/satori/go.uuid"
"golang.org/x/net/context"
"github.com/prometheus/alertmanager/provider"
"github.com/prometheus/alertmanager/types"
)
var (
numReceivedAlerts = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "alerts_received_total",
Help: "The total number of received alerts.",
}, []string{"status"})
numInvalidAlerts = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "alerts_invalid_total",
Help: "The total number of received alerts that were invalid.",
})
)
func init() {
prometheus.Register(numReceivedAlerts)
prometheus.Register(numInvalidAlerts)
}
// API provides registration of handlers for API routes.
type API struct {
alerts provider.Alerts
silences provider.Silences
config string
resolveTimeout time.Duration
uptime time.Time
groups func() AlertOverview
// context is an indirection for testing.
context func(r *http.Request) context.Context
mtx sync.RWMutex
}
// NewAPI returns a new API.
func NewAPI(alerts provider.Alerts, silences provider.Silences, gf func() AlertOverview) *API {
return &API{
context: route.Context,
alerts: alerts,
silences: silences,
groups: gf,
uptime: time.Now(),
}
}
// Register registers the API handlers under their correct routes
// in the given router.
func (api *API) Register(r *route.Router) {
ihf := prometheus.InstrumentHandlerFunc
// Register legacy forwarder for alert pushing.
r.Post("/alerts", ihf("legacy_add_alerts", api.legacyAddAlerts))
// Register actual API.
r = r.WithPrefix("/v1")
r.Get("/status", ihf("status", api.status))
r.Get("/alerts/groups", ihf("alert_groups", api.alertGroups))
r.Get("/alerts", ihf("list_alerts", api.listAlerts))
r.Post("/alerts", ihf("add_alerts", api.addAlerts))
r.Get("/silences", ihf("list_silences", api.listSilences))
r.Post("/silences", ihf("add_silence", api.addSilence))
r.Get("/silence/:sid", ihf("get_silence", api.getSilence))
r.Del("/silence/:sid", ihf("del_silence", api.delSilence))
}
// Update sets the configuration string to a new value.
func (api *API) Update(config string, resolveTimeout time.Duration) {
api.mtx.Lock()
defer api.mtx.Unlock()
api.config = config
api.resolveTimeout = resolveTimeout
}
type errorType string
const (
errorNone errorType = ""
errorInternal = "server_error"
errorBadData = "bad_data"
)
type apiError struct {
typ errorType
err error
}
func (e *apiError) Error() string {
return fmt.Sprintf("%s: %s", e.typ, e.err)
}
func (api *API) status(w http.ResponseWriter, req *http.Request) {
api.mtx.RLock()
var status = struct {
Config string `json:"config"`
VersionInfo map[string]string `json:"versionInfo"`
Uptime time.Time `json:"uptime"`
}{
Config: api.config,
VersionInfo: map[string]string{
"version": version.Version,
"revision": version.Revision,
"branch": version.Branch,
"buildUser": version.BuildUser,
"buildDate": version.BuildDate,
"goVersion": version.GoVersion,
},
Uptime: api.uptime,
}
api.mtx.RUnlock()
respond(w, status)
}
func (api *API) alertGroups(w http.ResponseWriter, req *http.Request) {
respond(w, api.groups())
}
func (api *API) listAlerts(w http.ResponseWriter, r *http.Request) {
alerts := api.alerts.GetPending()
defer alerts.Close()
var (
err error
res []*types.Alert
)
// TODO(fabxc): enforce a sensible timeout.
for a := range alerts.Next() {
if err = alerts.Err(); err != nil {
break
}
res = append(res, a)
}
if err != nil {
respondError(w, apiError{
typ: errorInternal,
err: err,
}, nil)
return
}
respond(w, types.Alerts(res...))
}
func (api *API) legacyAddAlerts(w http.ResponseWriter, r *http.Request) {
var legacyAlerts = []struct {
Summary model.LabelValue `json:"summary"`
Description model.LabelValue `json:"description"`
Runbook model.LabelValue `json:"runbook"`
Labels model.LabelSet `json:"labels"`
Payload model.LabelSet `json:"payload"`
}{}
if err := receive(r, &legacyAlerts); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
var alerts []*types.Alert
for _, la := range legacyAlerts {
a := &types.Alert{
Alert: model.Alert{
Labels: la.Labels,
Annotations: la.Payload,
},
}
if a.Annotations == nil {
a.Annotations = model.LabelSet{}
}
a.Annotations["summary"] = la.Summary
a.Annotations["description"] = la.Description
a.Annotations["runbook"] = la.Runbook
alerts = append(alerts, a)
}
api.insertAlerts(w, r, alerts...)
}
func (api *API) addAlerts(w http.ResponseWriter, r *http.Request) {
var alerts []*types.Alert
if err := receive(r, &alerts); err != nil {
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
return
}
api.insertAlerts(w, r, alerts...)
}
func (api *API) insertAlerts(w http.ResponseWriter, r *http.Request, alerts ...*types.Alert) {
now := time.Now()
for _, alert := range alerts {
alert.UpdatedAt = now
// Ensure StartsAt is set.
if alert.StartsAt.IsZero() {
alert.StartsAt = now
}
// If no end time is defined, set a timeout after which an alert
// is marked resolved if it is not updated.
if alert.EndsAt.IsZero() {
alert.Timeout = true
alert.EndsAt = now.Add(api.resolveTimeout)
numReceivedAlerts.WithLabelValues("firing").Inc()
} else {
numReceivedAlerts.WithLabelValues("resolved").Inc()
}
}
// Make a best effort to insert all alerts that are valid.
var (
validAlerts = make([]*types.Alert, 0, len(alerts))
validationErrs = &types.MultiError{}
)
for _, a := range alerts {
if err := a.Validate(); err != nil {
validationErrs.Add(err)
numInvalidAlerts.Inc()
continue
}
validAlerts = append(validAlerts, a)
}
if err := api.alerts.Put(validAlerts...); err != nil {
respondError(w, apiError{
typ: errorInternal,
err: err,
}, nil)
return
}
if validationErrs.Len() > 0 {
respondError(w, apiError{
typ: errorBadData,
err: validationErrs,
}, nil)
return
}
respond(w, nil)
}
func (api *API) addSilence(w http.ResponseWriter, r *http.Request) {
var msil model.Silence
if err := receive(r, &msil); err != nil {
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
return
}
sil := types.NewSilence(&msil)
// if sil.CreatedAt.IsZero() {
// sil.CreatedAt = time.Now()
// }
if err := sil.Validate(); err != nil {
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
return
}
if err := sil.Init(); err != nil {
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
return
}
sid, err := api.silences.Set(sil)
if err != nil {
respondError(w, apiError{
typ: errorInternal,
err: err,
}, nil)
return
}
respond(w, struct {
SilenceID uuid.UUID `json:"silenceId"`
}{
SilenceID: sid,
})
}
func (api *API) getSilence(w http.ResponseWriter, r *http.Request) {
sids := route.Param(api.context(r), "sid")
sid, err := uuid.FromString(sids)
if err != nil {
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
return
}
sil, err := api.silences.Get(sid)
if err != nil {
http.Error(w, fmt.Sprint("Error getting silence: ", err), http.StatusNotFound)
return
}
respond(w, &sil)
}
func (api *API) delSilence(w http.ResponseWriter, r *http.Request) {
sids := route.Param(api.context(r), "sid")
sid, err := uuid.FromString(sids)
if err != nil {
respondError(w, apiError{
typ: errorBadData,
err: err,
}, nil)
return
}
if err := api.silences.Del(sid); err != nil {
respondError(w, apiError{
typ: errorInternal,
err: err,
}, nil)
return
}
respond(w, nil)
}
func (api *API) listSilences(w http.ResponseWriter, r *http.Request) {
sils, err := api.silences.All()
if err != nil {
respondError(w, apiError{
typ: errorInternal,
err: err,
}, nil)
return
}
respond(w, sils)
}
type status string
const (
statusSuccess status = "success"
statusError = "error"
)
type response struct {
Status status `json:"status"`
Data interface{} `json:"data,omitempty"`
ErrorType errorType `json:"errorType,omitempty"`
Error string `json:"error,omitempty"`
}
func respond(w http.ResponseWriter, data interface{}) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(200)
b, err := json.Marshal(&response{
Status: statusSuccess,
Data: data,
})
if err != nil {
return
}
w.Write(b)
}
func respondError(w http.ResponseWriter, apiErr apiError, data interface{}) {
w.Header().Set("Content-Type", "application/json")
switch apiErr.typ {
case errorBadData:
w.WriteHeader(http.StatusBadRequest)
case errorInternal:
w.WriteHeader(http.StatusInternalServerError)
default:
panic(fmt.Sprintf("unknown error type %q", apiErr))
}
b, err := json.Marshal(&response{
Status: statusError,
ErrorType: apiErr.typ,
Error: apiErr.err.Error(),
Data: data,
})
if err != nil {
return
}
log.Errorf("api error: %v", apiErr)
w.Write(b)
}
func receive(r *http.Request, v interface{}) error {
dec := json.NewDecoder(r.Body)
defer r.Body.Close()
err := dec.Decode(v)
if err != nil {
log.Debugf("Decoding request failed: %v", err)
}
return err
}