alertmanager/main.go

371 lines
9.3 KiB
Go
Raw Normal View History

// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
2015-07-01 11:17:08 +00:00
import (
2015-07-01 15:56:53 +00:00
"flag"
2015-11-11 15:50:54 +00:00
"fmt"
2016-06-02 13:43:04 +00:00
"io/ioutil"
stdlog "log"
2015-11-26 17:19:46 +00:00
"net"
2015-07-01 11:17:08 +00:00
"net/http"
2015-11-26 17:19:46 +00:00
"net/url"
2015-09-29 09:42:29 +00:00
"os"
"os/signal"
"path"
2016-06-02 13:43:04 +00:00
"sort"
"strconv"
2015-11-11 15:50:54 +00:00
"strings"
2015-09-29 09:42:29 +00:00
"syscall"
2015-11-27 14:41:22 +00:00
"time"
2015-07-01 11:17:08 +00:00
2016-03-03 13:20:21 +00:00
"github.com/prometheus/client_golang/prometheus"
2015-09-28 10:12:27 +00:00
"github.com/prometheus/common/log"
2015-07-01 11:17:08 +00:00
"github.com/prometheus/common/route"
"github.com/prometheus/common/version"
2016-06-02 13:43:04 +00:00
"github.com/weaveworks/mesh"
2015-07-01 15:56:53 +00:00
2015-09-25 16:14:46 +00:00
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/notify"
2016-05-02 09:08:22 +00:00
"github.com/prometheus/alertmanager/provider/boltmem"
2016-06-02 13:43:04 +00:00
meshprov "github.com/prometheus/alertmanager/provider/mesh"
2015-10-11 11:32:24 +00:00
"github.com/prometheus/alertmanager/template"
"github.com/prometheus/alertmanager/types"
2015-07-01 15:56:53 +00:00
)
2016-03-03 13:20:21 +00:00
var (
configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "alertmanager",
Name: "config_last_reload_successful",
Help: "Whether the last configuration reload attempt was successful.",
})
configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "alertmanager",
Name: "config_last_reload_success_timestamp_seconds",
Help: "Timestamp of the last successful configuration reload.",
})
)
func init() {
prometheus.MustRegister(configSuccess)
prometheus.MustRegister(configSuccessTime)
prometheus.MustRegister(version.NewCollector("alertmanager"))
2016-03-03 13:20:21 +00:00
}
func main() {
2016-06-02 13:43:04 +00:00
peers := &stringset{}
var (
showVersion = flag.Bool("version", false, "Print version information.")
configFile = flag.String("config.file", "alertmanager.yml", "Alertmanager configuration file name.")
dataDir = flag.String("storage.path", "data/", "Base path for data storage.")
externalURL = flag.String("web.external-url", "", "The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager. If omitted, relevant URL components will be derived automatically.")
listenAddress = flag.String("web.listen-address", ":9093", "Address to listen on for the web interface and API.")
meshListen = flag.String("mesh.listen-address", net.JoinHostPort("0.0.0.0", strconv.Itoa(mesh.Port)), "mesh listen address")
hwaddr = flag.String("mesh.hardware-address", mustHardwareAddr(), "MAC address, i.e. mesh peer ID")
nickname = flag.String("mesh.nickname", mustHostname(), "peer nickname")
)
flag.Var(peers, "mesh.peer", "initial peers (may be repeated)")
2015-09-29 09:50:59 +00:00
flag.Parse()
2015-11-11 15:50:54 +00:00
if *showVersion {
fmt.Fprintln(os.Stdout, version.Print("alertmanager"))
2015-11-11 15:50:54 +00:00
os.Exit(0)
}
log.Infoln("Starting alertmanager", version.Info())
log.Infoln("Build context", version.BuildContext())
2015-10-29 13:30:58 +00:00
err := os.MkdirAll(*dataDir, 0777)
if err != nil {
log.Fatal(err)
}
2015-10-06 10:36:33 +00:00
2016-06-02 13:43:04 +00:00
mrouter := initMesh(*meshListen, *hwaddr, *nickname)
ni := meshprov.NewNotificationInfos(log.Base())
ni.Register(mrouter.NewGossip("notify_info", ni))
marker := types.NewMarker()
2016-06-02 13:43:04 +00:00
silences := meshprov.NewSilences(marker, log.Base())
silences.Register(mrouter.NewGossip("silences", silences))
2016-05-02 09:08:22 +00:00
2016-06-02 13:43:04 +00:00
mrouter.Start()
defer mrouter.Stop()
mrouter.ConnectionMaker.InitiateConnections(peers.slice(), true)
2016-05-02 09:08:22 +00:00
2016-06-02 13:43:04 +00:00
alerts, err := boltmem.NewAlerts(*dataDir)
if err != nil {
log.Fatal(err)
}
2016-06-02 13:43:04 +00:00
defer alerts.Close()
2015-09-27 17:50:41 +00:00
var (
inhibitor *Inhibitor
tmpl *template.Template
disp *Dispatcher
)
defer disp.Stop()
api := NewAPI(alerts, silences, func() AlertOverview {
return disp.Groups()
})
2015-11-10 13:08:20 +00:00
build := func(rcvs []*config.Receiver) notify.Notifier {
var (
router = notify.Router{}
2015-11-10 13:08:20 +00:00
fanouts = notify.Build(rcvs, tmpl)
)
for name, fo := range fanouts {
for i, n := range fo {
n = notify.Retry(n)
n = notify.Log(n, log.With("step", "retry"))
2016-06-02 13:43:04 +00:00
n = notify.Dedup(ni, n)
n = notify.Log(n, log.With("step", "dedup"))
n = notify.Wait(meshWait(mrouter, 5*time.Second), n)
n = notify.Log(n, log.With("step", "wait"))
fo[i] = n
}
router[name] = fo
}
n := notify.Notifier(router)
n = notify.Log(n, log.With("step", "route"))
2015-12-03 16:27:36 +00:00
n = notify.Silence(silences, n, marker)
n = notify.Log(n, log.With("step", "silence"))
2015-12-03 16:27:36 +00:00
n = notify.Inhibit(inhibitor, n, marker)
n = notify.Log(n, log.With("step", "inhibit"))
return n
2015-09-27 11:18:13 +00:00
}
amURL, err := extURL(*listenAddress, *externalURL)
if err != nil {
log.Fatal(err)
}
reload := func() (err error) {
log.With("file", *configFile).Infof("Loading configuration file")
defer func() {
if err != nil {
2015-10-17 08:02:52 +00:00
log.With("file", *configFile).Errorf("Loading configuration file failed: %s", err)
2016-03-03 13:20:21 +00:00
configSuccess.Set(0)
} else {
configSuccess.Set(1)
configSuccessTime.Set(float64(time.Now().Unix()))
}
}()
conf, err := config.LoadFile(*configFile)
if err != nil {
return err
}
2015-11-27 14:41:22 +00:00
api.Update(conf.String(), time.Duration(conf.Global.ResolveTimeout))
tmpl, err = template.FromGlobs(conf.Templates...)
if err != nil {
return err
}
tmpl.ExternalURL = amURL
inhibitor.Stop()
disp.Stop()
inhibitor = NewInhibitor(alerts, conf.InhibitRules, marker)
2015-11-10 13:08:20 +00:00
disp = NewDispatcher(alerts, NewRoute(conf.Route, nil), build(conf.Receivers), marker)
go disp.Run()
go inhibitor.Run()
return nil
}
if err := reload(); err != nil {
os.Exit(1)
2015-09-29 10:22:13 +00:00
}
2015-07-01 11:17:08 +00:00
router := route.New()
2015-10-12 05:10:25 +00:00
webReload := make(chan struct{})
RegisterWeb(router.WithPrefix(amURL.Path), webReload)
api.Register(router.WithPrefix(path.Join(amURL.Path, "/api")))
log.Infoln("Listening on", *listenAddress)
go listen(*listenAddress, router)
2015-09-29 09:42:29 +00:00
2015-09-29 10:22:13 +00:00
var (
2016-06-06 13:42:16 +00:00
hup = make(chan os.Signal)
hupReady = make(chan bool)
term = make(chan os.Signal)
2015-09-29 10:22:13 +00:00
)
signal.Notify(hup, syscall.SIGHUP)
2015-09-29 09:42:29 +00:00
signal.Notify(term, os.Interrupt, syscall.SIGTERM)
2015-09-29 10:22:13 +00:00
go func() {
2016-06-06 13:42:16 +00:00
<-hupReady
for {
select {
case <-hup:
case <-webReload:
2016-06-06 13:42:16 +00:00
}
reload()
2015-09-29 10:22:13 +00:00
}
}()
2016-06-06 13:42:16 +00:00
// Wait for reload or termination signals.
close(hupReady) // Unblock SIGHUP handler.
2015-09-29 09:42:29 +00:00
<-term
2015-09-29 09:58:30 +00:00
log.Infoln("Received SIGTERM, exiting gracefully...")
2015-09-29 10:22:13 +00:00
}
2015-11-11 15:50:54 +00:00
type peerDescSlice []mesh.PeerDescription
func (s peerDescSlice) Len() int { return len(s) }
func (s peerDescSlice) Less(i, j int) bool { return s[i].UID < s[j].UID }
func (s peerDescSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// meshWait returns a function that inspects the current peer state and returns
// a duration of one base timeout for each peer with a higher ID than ourselves.
func meshWait(r *mesh.Router, timeout time.Duration) func() time.Duration {
return func() time.Duration {
var peers peerDescSlice
for _, desc := range r.Peers.Descriptions() {
peers = append(peers, desc)
}
sort.Sort(peers)
k := 0
for _, desc := range peers {
2016-06-06 09:16:26 +00:00
if desc.Self {
break
}
k++
}
log.Warnf("timeout multiplier: %d", k)
return time.Duration(k) * timeout
}
}
2016-06-02 13:43:04 +00:00
func initMesh(addr, hwaddr, nickname string) *mesh.Router {
host, portStr, err := net.SplitHostPort(addr)
if err != nil {
log.Fatalf("mesh address: %s: %v", addr, err)
}
port, err := strconv.Atoi(portStr)
if err != nil {
log.Fatalf("mesh address: %s: %v", addr, err)
}
name, err := mesh.PeerNameFromString(hwaddr)
if err != nil {
log.Fatalf("%s: %v", hwaddr, err)
}
return mesh.NewRouter(mesh.Config{
Host: host,
Port: port,
ProtocolMinVersion: mesh.ProtocolMinVersion,
Password: []byte(""),
ConnLimit: 64,
PeerDiscovery: true,
TrustedSubnets: []*net.IPNet{},
}, name, nickname, mesh.NullOverlay{}, stdlog.New(ioutil.Discard, "", 0))
}
func extURL(listen, external string) (*url.URL, error) {
if external == "" {
2015-11-26 17:19:46 +00:00
hostname, err := os.Hostname()
if err != nil {
return nil, err
}
_, port, err := net.SplitHostPort(listen)
2015-11-26 17:19:46 +00:00
if err != nil {
return nil, err
}
external = fmt.Sprintf("http://%s:%s/", hostname, port)
2015-11-26 17:19:46 +00:00
}
u, err := url.Parse(external)
2015-11-26 17:19:46 +00:00
if err != nil {
return nil, err
}
ppref := strings.TrimRight(u.Path, "/")
if ppref != "" && !strings.HasPrefix(ppref, "/") {
ppref = "/" + ppref
}
u.Path = ppref
return u, nil
}
2016-01-28 12:44:40 +00:00
func listen(listen string, router *route.Router) {
if err := http.ListenAndServe(listen, router); err != nil {
2016-01-28 12:44:40 +00:00
log.Fatal(err)
}
}
2016-06-02 13:43:04 +00:00
type stringset map[string]struct{}
func (ss stringset) Set(value string) error {
ss[value] = struct{}{}
return nil
}
func (ss stringset) String() string {
return strings.Join(ss.slice(), ",")
}
func (ss stringset) slice() []string {
slice := make([]string, 0, len(ss))
for k := range ss {
slice = append(slice, k)
}
sort.Strings(slice)
return slice
}
func mustHardwareAddr() string {
ifaces, err := net.Interfaces()
if err != nil {
panic(err)
}
for _, iface := range ifaces {
if s := iface.HardwareAddr.String(); s != "" {
return s
}
}
panic("no valid network interfaces")
}
func mustHostname() string {
hostname, err := os.Hostname()
if err != nil {
panic(err)
}
return hostname
}