alertmanager/main.go
2016-08-09 12:00:28 +02:00

371 lines
9.3 KiB
Go

// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"fmt"
"io/ioutil"
stdlog "log"
"net"
"net/http"
"net/url"
"os"
"os/signal"
"path"
"sort"
"strconv"
"strings"
"syscall"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"github.com/prometheus/common/route"
"github.com/prometheus/common/version"
"github.com/weaveworks/mesh"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/notify"
"github.com/prometheus/alertmanager/provider/boltmem"
meshprov "github.com/prometheus/alertmanager/provider/mesh"
"github.com/prometheus/alertmanager/template"
"github.com/prometheus/alertmanager/types"
)
var (
configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "alertmanager",
Name: "config_last_reload_successful",
Help: "Whether the last configuration reload attempt was successful.",
})
configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "alertmanager",
Name: "config_last_reload_success_timestamp_seconds",
Help: "Timestamp of the last successful configuration reload.",
})
)
func init() {
prometheus.MustRegister(configSuccess)
prometheus.MustRegister(configSuccessTime)
prometheus.MustRegister(version.NewCollector("alertmanager"))
}
func main() {
peers := &stringset{}
var (
showVersion = flag.Bool("version", false, "Print version information.")
configFile = flag.String("config.file", "alertmanager.yml", "Alertmanager configuration file name.")
dataDir = flag.String("storage.path", "data/", "Base path for data storage.")
externalURL = flag.String("web.external-url", "", "The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager. If omitted, relevant URL components will be derived automatically.")
listenAddress = flag.String("web.listen-address", ":9093", "Address to listen on for the web interface and API.")
meshListen = flag.String("mesh.listen-address", net.JoinHostPort("0.0.0.0", strconv.Itoa(mesh.Port)), "mesh listen address")
hwaddr = flag.String("mesh.hardware-address", mustHardwareAddr(), "MAC address, i.e. mesh peer ID")
nickname = flag.String("mesh.nickname", mustHostname(), "peer nickname")
)
flag.Var(peers, "mesh.peer", "initial peers (may be repeated)")
flag.Parse()
if *showVersion {
fmt.Fprintln(os.Stdout, version.Print("alertmanager"))
os.Exit(0)
}
log.Infoln("Starting alertmanager", version.Info())
log.Infoln("Build context", version.BuildContext())
err := os.MkdirAll(*dataDir, 0777)
if err != nil {
log.Fatal(err)
}
mrouter := initMesh(*meshListen, *hwaddr, *nickname)
ni := meshprov.NewNotificationInfos(log.Base())
ni.Register(mrouter.NewGossip("notify_info", ni))
marker := types.NewMarker()
silences := meshprov.NewSilences(marker, log.Base())
silences.Register(mrouter.NewGossip("silences", silences))
mrouter.Start()
defer mrouter.Stop()
mrouter.ConnectionMaker.InitiateConnections(peers.slice(), true)
alerts, err := boltmem.NewAlerts(*dataDir)
if err != nil {
log.Fatal(err)
}
defer alerts.Close()
var (
inhibitor *Inhibitor
tmpl *template.Template
disp *Dispatcher
)
defer disp.Stop()
api := NewAPI(alerts, silences, func() AlertOverview {
return disp.Groups()
})
build := func(rcvs []*config.Receiver) notify.Notifier {
var (
router = notify.Router{}
fanouts = notify.Build(rcvs, tmpl)
)
for name, fo := range fanouts {
for i, n := range fo {
n = notify.Retry(n)
n = notify.Log(n, log.With("step", "retry"))
n = notify.Dedup(ni, n)
n = notify.Log(n, log.With("step", "dedup"))
n = notify.Wait(meshWait(mrouter, 5*time.Second), n)
n = notify.Log(n, log.With("step", "wait"))
fo[i] = n
}
router[name] = fo
}
n := notify.Notifier(router)
n = notify.Log(n, log.With("step", "route"))
n = notify.Silence(silences, n, marker)
n = notify.Log(n, log.With("step", "silence"))
n = notify.Inhibit(inhibitor, n, marker)
n = notify.Log(n, log.With("step", "inhibit"))
return n
}
amURL, err := extURL(*listenAddress, *externalURL)
if err != nil {
log.Fatal(err)
}
reload := func() (err error) {
log.With("file", *configFile).Infof("Loading configuration file")
defer func() {
if err != nil {
log.With("file", *configFile).Errorf("Loading configuration file failed: %s", err)
configSuccess.Set(0)
} else {
configSuccess.Set(1)
configSuccessTime.Set(float64(time.Now().Unix()))
}
}()
conf, err := config.LoadFile(*configFile)
if err != nil {
return err
}
api.Update(conf.String(), time.Duration(conf.Global.ResolveTimeout))
tmpl, err = template.FromGlobs(conf.Templates...)
if err != nil {
return err
}
tmpl.ExternalURL = amURL
inhibitor.Stop()
disp.Stop()
inhibitor = NewInhibitor(alerts, conf.InhibitRules, marker)
disp = NewDispatcher(alerts, NewRoute(conf.Route, nil), build(conf.Receivers), marker)
go disp.Run()
go inhibitor.Run()
return nil
}
if err := reload(); err != nil {
os.Exit(1)
}
router := route.New()
webReload := make(chan struct{})
RegisterWeb(router.WithPrefix(amURL.Path), webReload)
api.Register(router.WithPrefix(path.Join(amURL.Path, "/api")))
log.Infoln("Listening on", *listenAddress)
go listen(*listenAddress, router)
var (
hup = make(chan os.Signal)
hupReady = make(chan bool)
term = make(chan os.Signal)
)
signal.Notify(hup, syscall.SIGHUP)
signal.Notify(term, os.Interrupt, syscall.SIGTERM)
go func() {
<-hupReady
for {
select {
case <-hup:
case <-webReload:
}
reload()
}
}()
// Wait for reload or termination signals.
close(hupReady) // Unblock SIGHUP handler.
<-term
log.Infoln("Received SIGTERM, exiting gracefully...")
}
type peerDescSlice []mesh.PeerDescription
func (s peerDescSlice) Len() int { return len(s) }
func (s peerDescSlice) Less(i, j int) bool { return s[i].UID < s[j].UID }
func (s peerDescSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// meshWait returns a function that inspects the current peer state and returns
// a duration of one base timeout for each peer with a higher ID than ourselves.
func meshWait(r *mesh.Router, timeout time.Duration) func() time.Duration {
return func() time.Duration {
var peers peerDescSlice
for _, desc := range r.Peers.Descriptions() {
peers = append(peers, desc)
}
sort.Sort(peers)
k := 0
for _, desc := range peers {
if desc.Self {
break
}
k++
}
log.Warnf("timeout multiplier: %d", k)
return time.Duration(k) * timeout
}
}
func initMesh(addr, hwaddr, nickname string) *mesh.Router {
host, portStr, err := net.SplitHostPort(addr)
if err != nil {
log.Fatalf("mesh address: %s: %v", addr, err)
}
port, err := strconv.Atoi(portStr)
if err != nil {
log.Fatalf("mesh address: %s: %v", addr, err)
}
name, err := mesh.PeerNameFromString(hwaddr)
if err != nil {
log.Fatalf("%s: %v", hwaddr, err)
}
return mesh.NewRouter(mesh.Config{
Host: host,
Port: port,
ProtocolMinVersion: mesh.ProtocolMinVersion,
Password: []byte(""),
ConnLimit: 64,
PeerDiscovery: true,
TrustedSubnets: []*net.IPNet{},
}, name, nickname, mesh.NullOverlay{}, stdlog.New(ioutil.Discard, "", 0))
}
func extURL(listen, external string) (*url.URL, error) {
if external == "" {
hostname, err := os.Hostname()
if err != nil {
return nil, err
}
_, port, err := net.SplitHostPort(listen)
if err != nil {
return nil, err
}
external = fmt.Sprintf("http://%s:%s/", hostname, port)
}
u, err := url.Parse(external)
if err != nil {
return nil, err
}
ppref := strings.TrimRight(u.Path, "/")
if ppref != "" && !strings.HasPrefix(ppref, "/") {
ppref = "/" + ppref
}
u.Path = ppref
return u, nil
}
func listen(listen string, router *route.Router) {
if err := http.ListenAndServe(listen, router); err != nil {
log.Fatal(err)
}
}
type stringset map[string]struct{}
func (ss stringset) Set(value string) error {
ss[value] = struct{}{}
return nil
}
func (ss stringset) String() string {
return strings.Join(ss.slice(), ",")
}
func (ss stringset) slice() []string {
slice := make([]string, 0, len(ss))
for k := range ss {
slice = append(slice, k)
}
sort.Strings(slice)
return slice
}
func mustHardwareAddr() string {
ifaces, err := net.Interfaces()
if err != nil {
panic(err)
}
for _, iface := range ifaces {
if s := iface.HardwareAddr.String(); s != "" {
return s
}
}
panic("no valid network interfaces")
}
func mustHostname() string {
hostname, err := os.Hostname()
if err != nil {
panic(err)
}
return hostname
}