2015-06-30 12:29:30 +00:00
// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
2015-07-01 11:17:08 +00:00
import (
2015-07-01 15:56:53 +00:00
"flag"
2015-11-11 15:50:54 +00:00
"fmt"
2016-06-02 13:43:04 +00:00
"io/ioutil"
stdlog "log"
2015-11-26 17:19:46 +00:00
"net"
2015-07-01 11:17:08 +00:00
"net/http"
2015-11-26 17:19:46 +00:00
"net/url"
2015-09-29 09:42:29 +00:00
"os"
"os/signal"
2016-03-30 20:03:54 +00:00
"path"
2016-06-02 13:43:04 +00:00
"sort"
"strconv"
2015-11-11 15:50:54 +00:00
"strings"
2015-09-29 09:42:29 +00:00
"syscall"
2015-11-27 14:41:22 +00:00
"time"
2015-07-01 11:17:08 +00:00
2016-03-03 13:20:21 +00:00
"github.com/prometheus/client_golang/prometheus"
2015-09-28 10:12:27 +00:00
"github.com/prometheus/common/log"
2015-07-01 11:17:08 +00:00
"github.com/prometheus/common/route"
2016-05-15 10:01:12 +00:00
"github.com/prometheus/common/version"
2016-06-02 13:43:04 +00:00
"github.com/weaveworks/mesh"
2015-07-01 15:56:53 +00:00
2015-09-25 16:14:46 +00:00
"github.com/prometheus/alertmanager/config"
2015-09-29 13:02:15 +00:00
"github.com/prometheus/alertmanager/notify"
2016-05-02 09:08:22 +00:00
"github.com/prometheus/alertmanager/provider/boltmem"
2016-06-02 13:43:04 +00:00
meshprov "github.com/prometheus/alertmanager/provider/mesh"
2015-10-11 11:32:24 +00:00
"github.com/prometheus/alertmanager/template"
2015-11-09 13:34:57 +00:00
"github.com/prometheus/alertmanager/types"
2015-07-01 15:56:53 +00:00
)
2016-03-03 13:20:21 +00:00
var (
configSuccess = prometheus . NewGauge ( prometheus . GaugeOpts {
Namespace : "alertmanager" ,
Name : "config_last_reload_successful" ,
Help : "Whether the last configuration reload attempt was successful." ,
} )
configSuccessTime = prometheus . NewGauge ( prometheus . GaugeOpts {
Namespace : "alertmanager" ,
Name : "config_last_reload_success_timestamp_seconds" ,
Help : "Timestamp of the last successful configuration reload." ,
} )
)
func init ( ) {
prometheus . MustRegister ( configSuccess )
prometheus . MustRegister ( configSuccessTime )
2016-05-15 10:01:12 +00:00
prometheus . MustRegister ( version . NewCollector ( "alertmanager" ) )
2016-03-03 13:20:21 +00:00
}
2015-06-30 12:29:30 +00:00
func main ( ) {
2016-06-02 13:43:04 +00:00
peers := & stringset { }
var (
showVersion = flag . Bool ( "version" , false , "Print version information." )
configFile = flag . String ( "config.file" , "alertmanager.yml" , "Alertmanager configuration file name." )
dataDir = flag . String ( "storage.path" , "data/" , "Base path for data storage." )
externalURL = flag . String ( "web.external-url" , "" , "The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager. If omitted, relevant URL components will be derived automatically." )
listenAddress = flag . String ( "web.listen-address" , ":9093" , "Address to listen on for the web interface and API." )
meshListen = flag . String ( "mesh.listen-address" , net . JoinHostPort ( "0.0.0.0" , strconv . Itoa ( mesh . Port ) ) , "mesh listen address" )
hwaddr = flag . String ( "mesh.hardware-address" , mustHardwareAddr ( ) , "MAC address, i.e. mesh peer ID" )
nickname = flag . String ( "mesh.nickname" , mustHostname ( ) , "peer nickname" )
)
flag . Var ( peers , "mesh.peer" , "initial peers (may be repeated)" )
2015-09-29 09:50:59 +00:00
flag . Parse ( )
2015-11-11 15:50:54 +00:00
if * showVersion {
2016-05-15 10:01:12 +00:00
fmt . Fprintln ( os . Stdout , version . Print ( "alertmanager" ) )
2015-11-11 15:50:54 +00:00
os . Exit ( 0 )
}
2016-05-15 10:01:12 +00:00
log . Infoln ( "Starting alertmanager" , version . Info ( ) )
log . Infoln ( "Build context" , version . BuildContext ( ) )
2015-10-29 13:30:58 +00:00
err := os . MkdirAll ( * dataDir , 0777 )
if err != nil {
log . Fatal ( err )
}
2015-10-06 10:36:33 +00:00
2016-06-02 13:43:04 +00:00
mrouter := initMesh ( * meshListen , * hwaddr , * nickname )
ni := meshprov . NewNotificationInfos ( log . Base ( ) )
ni . Register ( mrouter . NewGossip ( "notify_info" , ni ) )
2015-11-09 13:34:57 +00:00
marker := types . NewMarker ( )
2016-06-02 13:43:04 +00:00
silences := meshprov . NewSilences ( marker , log . Base ( ) )
silences . Register ( mrouter . NewGossip ( "silences" , silences ) )
2016-05-02 09:08:22 +00:00
2016-06-02 13:43:04 +00:00
mrouter . Start ( )
defer mrouter . Stop ( )
mrouter . ConnectionMaker . InitiateConnections ( peers . slice ( ) , true )
2016-05-02 09:08:22 +00:00
2016-06-02 13:43:04 +00:00
alerts , err := boltmem . NewAlerts ( * dataDir )
2015-10-06 10:23:48 +00:00
if err != nil {
log . Fatal ( err )
}
2016-06-02 13:43:04 +00:00
defer alerts . Close ( )
2015-09-27 17:50:41 +00:00
2015-10-11 14:54:39 +00:00
var (
inhibitor * Inhibitor
tmpl * template . Template
disp * Dispatcher
)
defer disp . Stop ( )
2015-10-08 09:02:49 +00:00
2015-11-10 13:52:04 +00:00
api := NewAPI ( alerts , silences , func ( ) AlertOverview {
2015-11-07 13:30:21 +00:00
return disp . Groups ( )
2015-11-02 18:41:23 +00:00
} )
2015-11-10 13:08:20 +00:00
build := func ( rcvs [ ] * config . Receiver ) notify . Notifier {
2015-10-11 14:54:39 +00:00
var (
router = notify . Router { }
2015-11-10 13:08:20 +00:00
fanouts = notify . Build ( rcvs , tmpl )
2015-10-11 14:54:39 +00:00
)
for name , fo := range fanouts {
for i , n := range fo {
n = notify . Retry ( n )
n = notify . Log ( n , log . With ( "step" , "retry" ) )
2016-06-02 13:43:04 +00:00
n = notify . Dedup ( ni , n )
2015-10-11 14:54:39 +00:00
n = notify . Log ( n , log . With ( "step" , "dedup" ) )
2016-06-02 17:29:52 +00:00
n = notify . Wait ( meshWait ( mrouter , 5 * time . Second ) , n )
n = notify . Log ( n , log . With ( "step" , "wait" ) )
2015-10-11 14:54:39 +00:00
fo [ i ] = n
}
router [ name ] = fo
}
2015-11-20 14:10:38 +00:00
n := notify . Notifier ( router )
2015-10-08 09:02:49 +00:00
2015-10-11 14:54:39 +00:00
n = notify . Log ( n , log . With ( "step" , "route" ) )
2015-12-03 16:27:36 +00:00
n = notify . Silence ( silences , n , marker )
2015-10-11 14:54:39 +00:00
n = notify . Log ( n , log . With ( "step" , "silence" ) )
2015-12-03 16:27:36 +00:00
n = notify . Inhibit ( inhibitor , n , marker )
2015-10-11 14:54:39 +00:00
n = notify . Log ( n , log . With ( "step" , "inhibit" ) )
2015-10-08 09:02:49 +00:00
2015-10-11 14:54:39 +00:00
return n
2015-09-27 11:18:13 +00:00
}
2015-10-10 13:11:37 +00:00
2016-06-02 13:46:30 +00:00
amURL , err := extURL ( * listenAddress , * externalURL )
2016-03-30 20:03:54 +00:00
if err != nil {
log . Fatal ( err )
}
2015-10-11 14:54:39 +00:00
reload := func ( ) ( err error ) {
log . With ( "file" , * configFile ) . Infof ( "Loading configuration file" )
defer func ( ) {
if err != nil {
2015-10-17 08:02:52 +00:00
log . With ( "file" , * configFile ) . Errorf ( "Loading configuration file failed: %s" , err )
2016-03-03 13:20:21 +00:00
configSuccess . Set ( 0 )
} else {
configSuccess . Set ( 1 )
configSuccessTime . Set ( float64 ( time . Now ( ) . Unix ( ) ) )
2015-10-10 13:11:37 +00:00
}
2015-10-11 14:54:39 +00:00
} ( )
2015-10-10 13:11:37 +00:00
2015-10-11 14:54:39 +00:00
conf , err := config . LoadFile ( * configFile )
if err != nil {
return err
}
2015-10-10 13:11:37 +00:00
2015-11-27 14:41:22 +00:00
api . Update ( conf . String ( ) , time . Duration ( conf . Global . ResolveTimeout ) )
2015-11-02 18:41:23 +00:00
2015-10-11 14:54:39 +00:00
tmpl , err = template . FromGlobs ( conf . Templates ... )
if err != nil {
return err
}
2016-03-30 20:03:54 +00:00
tmpl . ExternalURL = amURL
2015-10-10 13:11:37 +00:00
2016-06-13 13:14:51 +00:00
inhibitor . Stop ( )
2015-10-11 14:54:39 +00:00
disp . Stop ( )
2015-10-10 13:11:37 +00:00
2015-11-09 13:34:57 +00:00
inhibitor = NewInhibitor ( alerts , conf . InhibitRules , marker )
2015-11-10 13:08:20 +00:00
disp = NewDispatcher ( alerts , NewRoute ( conf . Route , nil ) , build ( conf . Receivers ) , marker )
2015-10-10 13:11:37 +00:00
2015-10-11 14:54:39 +00:00
go disp . Run ( )
2016-06-13 13:14:51 +00:00
go inhibitor . Run ( )
2015-10-10 13:11:37 +00:00
2015-10-11 14:54:39 +00:00
return nil
2015-10-10 13:11:37 +00:00
}
2015-10-11 14:54:39 +00:00
if err := reload ( ) ; err != nil {
os . Exit ( 1 )
2015-09-29 10:22:13 +00:00
}
2015-07-01 11:17:08 +00:00
router := route . New ( )
2015-10-12 05:10:25 +00:00
2016-06-12 09:27:21 +00:00
webReload := make ( chan struct { } )
RegisterWeb ( router . WithPrefix ( amURL . Path ) , webReload )
2016-03-30 20:03:54 +00:00
api . Register ( router . WithPrefix ( path . Join ( amURL . Path , "/api" ) ) )
2015-06-30 12:29:30 +00:00
2016-05-15 10:01:12 +00:00
log . Infoln ( "Listening on" , * listenAddress )
2016-06-02 13:46:30 +00:00
go listen ( * listenAddress , router )
2015-09-29 09:42:29 +00:00
2015-09-29 10:22:13 +00:00
var (
2016-06-06 13:42:16 +00:00
hup = make ( chan os . Signal )
hupReady = make ( chan bool )
term = make ( chan os . Signal )
2015-09-29 10:22:13 +00:00
)
signal . Notify ( hup , syscall . SIGHUP )
2015-09-29 09:42:29 +00:00
signal . Notify ( term , os . Interrupt , syscall . SIGTERM )
2015-09-29 10:22:13 +00:00
go func ( ) {
2016-06-06 13:42:16 +00:00
<- hupReady
for {
select {
case <- hup :
2016-06-12 09:27:21 +00:00
case <- webReload :
2016-06-06 13:42:16 +00:00
}
2015-10-11 14:54:39 +00:00
reload ( )
2015-09-29 10:22:13 +00:00
}
} ( )
2016-06-06 13:42:16 +00:00
// Wait for reload or termination signals.
close ( hupReady ) // Unblock SIGHUP handler.
2015-09-29 09:42:29 +00:00
<- term
2015-09-29 09:58:30 +00:00
log . Infoln ( "Received SIGTERM, exiting gracefully..." )
2015-09-29 10:22:13 +00:00
}
2015-11-11 15:50:54 +00:00
2016-06-02 17:29:52 +00:00
type peerDescSlice [ ] mesh . PeerDescription
func ( s peerDescSlice ) Len ( ) int { return len ( s ) }
func ( s peerDescSlice ) Less ( i , j int ) bool { return s [ i ] . UID < s [ j ] . UID }
func ( s peerDescSlice ) Swap ( i , j int ) { s [ i ] , s [ j ] = s [ j ] , s [ i ] }
// meshWait returns a function that inspects the current peer state and returns
// a duration of one base timeout for each peer with a higher ID than ourselves.
func meshWait ( r * mesh . Router , timeout time . Duration ) func ( ) time . Duration {
return func ( ) time . Duration {
var peers peerDescSlice
for _ , desc := range r . Peers . Descriptions ( ) {
peers = append ( peers , desc )
}
sort . Sort ( peers )
k := 0
for _ , desc := range peers {
2016-06-06 09:16:26 +00:00
if desc . Self {
2016-06-02 17:29:52 +00:00
break
}
k ++
}
log . Warnf ( "timeout multiplier: %d" , k )
return time . Duration ( k ) * timeout
}
}
2016-06-02 13:43:04 +00:00
func initMesh ( addr , hwaddr , nickname string ) * mesh . Router {
host , portStr , err := net . SplitHostPort ( addr )
if err != nil {
log . Fatalf ( "mesh address: %s: %v" , addr , err )
}
port , err := strconv . Atoi ( portStr )
if err != nil {
log . Fatalf ( "mesh address: %s: %v" , addr , err )
}
name , err := mesh . PeerNameFromString ( hwaddr )
if err != nil {
log . Fatalf ( "%s: %v" , hwaddr , err )
}
return mesh . NewRouter ( mesh . Config {
Host : host ,
Port : port ,
ProtocolMinVersion : mesh . ProtocolMinVersion ,
Password : [ ] byte ( "" ) ,
ConnLimit : 64 ,
PeerDiscovery : true ,
TrustedSubnets : [ ] * net . IPNet { } ,
} , name , nickname , mesh . NullOverlay { } , stdlog . New ( ioutil . Discard , "" , 0 ) )
}
2016-06-02 13:46:30 +00:00
func extURL ( listen , external string ) ( * url . URL , error ) {
if external == "" {
2015-11-26 17:19:46 +00:00
hostname , err := os . Hostname ( )
if err != nil {
return nil , err
}
2016-06-02 13:46:30 +00:00
_ , port , err := net . SplitHostPort ( listen )
2015-11-26 17:19:46 +00:00
if err != nil {
return nil , err
}
2016-06-02 13:46:30 +00:00
external = fmt . Sprintf ( "http://%s:%s/" , hostname , port )
2015-11-26 17:19:46 +00:00
}
2016-06-02 13:46:30 +00:00
u , err := url . Parse ( external )
2015-11-26 17:19:46 +00:00
if err != nil {
return nil , err
}
ppref := strings . TrimRight ( u . Path , "/" )
if ppref != "" && ! strings . HasPrefix ( ppref , "/" ) {
ppref = "/" + ppref
}
u . Path = ppref
return u , nil
}
2016-01-28 12:44:40 +00:00
2016-06-02 13:46:30 +00:00
func listen ( listen string , router * route . Router ) {
if err := http . ListenAndServe ( listen , router ) ; err != nil {
2016-01-28 12:44:40 +00:00
log . Fatal ( err )
}
}
2016-06-02 13:43:04 +00:00
type stringset map [ string ] struct { }
func ( ss stringset ) Set ( value string ) error {
ss [ value ] = struct { } { }
return nil
}
func ( ss stringset ) String ( ) string {
return strings . Join ( ss . slice ( ) , "," )
}
func ( ss stringset ) slice ( ) [ ] string {
slice := make ( [ ] string , 0 , len ( ss ) )
for k := range ss {
slice = append ( slice , k )
}
sort . Strings ( slice )
return slice
}
func mustHardwareAddr ( ) string {
ifaces , err := net . Interfaces ( )
if err != nil {
panic ( err )
}
for _ , iface := range ifaces {
if s := iface . HardwareAddr . String ( ) ; s != "" {
return s
}
}
panic ( "no valid network interfaces" )
}
func mustHostname ( ) string {
hostname , err := os . Hostname ( )
if err != nil {
panic ( err )
}
return hostname
}