From fc4b67ce80e23f62daf3447e04ef161008ad005e Mon Sep 17 00:00:00 2001
From: beorn7 <beorn@soundcloud.com>
Date: Fri, 25 Jan 2019 18:13:52 +0100
Subject: [PATCH 1/4] Introduce a timeout and concurrency limit for HTTP
 requests

The default concurrency limit is max(GOMAXPROCS, 8). That should not
imply that each GET requests eats a whole CPU. It's more to get some
reasonable heuristics for the processing power of the hosting machine
(while allowing at least 8 concurrent requests even on the smallest
machines). As GET requests can easily overload the Alertmanager,
rendering it incapable of doing its main task, namely sending alert
notifications, we need to limit GET requests by default.

In contrast, no timeout is set by default. The http.TimeoutHandler
inovkes quite a bit of machinery behind the scenes, in particular an
additional layer of buffering. Thus, we should first get a bit of
experience with it before we consider enforcing a timeout by default,
even if setting a timeout is in general the safer setting for
resiliency.

Signed-off-by: beorn7 <beorn@soundcloud.com>
---
 api/api.go               | 74 +++++++++++++++++++++++++++++++++++++---
 cmd/alertmanager/main.go | 10 +++---
 2 files changed, 76 insertions(+), 8 deletions(-)
diff --git a/api/api.go b/api/api.go
index 7d9d309c..52e249e6 100644
--- a/api/api.go
+++ b/api/api.go
@@ -14,7 +14,9 @@
 package api
 
 import (
+	"fmt"
 	"net/http"
+	"runtime"
 	"time"
 
 	apiv1 "github.com/prometheus/alertmanager/api/v1"
@@ -70,18 +72,39 @@ func New(
 	}, nil
 }
 
-// Register all APIs with the given router and return a mux.
-func (api *API) Register(r *route.Router, routePrefix string) *http.ServeMux {
+// Register all APIs. It registers APIv1 with the provided router directly. As
+// APIv2 works on the http.Handler level, this method also creates a new
+// http.ServeMux and then uses it to register both the provided router (to
+// handle "/") and APIv2 (to handle "<routePrefix>/api/v2"). The method returns
+// the newly created http.ServeMux.
+//
+// If the provided value for timeout is positive, it is enforced for all HTTP
+// requests. (Negative or zero results in no timeout at all.)
+//
+// If the provided value for concurrency is positive, it limits the number of
+// concurrently processed GET requests. Otherwise, the number of concurrently
+// processed GET requests is limited to GOMAXPROCS or 8, whatever is
+// larger. Status code 503 is served for GET requests that would exceed the
+// concurrency limit.
+func (api *API) Register(
+	r *route.Router, routePrefix string,
+	timeout time.Duration, concurrency int,
+) *http.ServeMux {
+	limiter := makeLimiter(timeout, concurrency)
+
 	api.v1.Register(r.WithPrefix("/api/v1"))
 
 	mux := http.NewServeMux()
-	mux.Handle("/", r)
+	mux.Handle("/", limiter(r))
 
 	apiPrefix := ""
 	if routePrefix != "/" {
 		apiPrefix = routePrefix
 	}
-	mux.Handle(apiPrefix+"/api/v2/", http.StripPrefix(apiPrefix+"/api/v2", api.v2.Handler))
+	mux.Handle(
+		apiPrefix+"/api/v2/",
+		limiter(http.StripPrefix(apiPrefix+"/api/v2", api.v2.Handler)),
+	)
 
 	return mux
 }
@@ -94,3 +117,46 @@ func (api *API) Update(cfg *config.Config, resolveTimeout time.Duration) error {
 
 	return api.v2.Update(cfg, resolveTimeout)
 }
+
+// makeLimiter returns an HTTP middleware that sets a timeout for HTTP requests
+// and also limits the number of concurrently processed GET requests to the
+// given number.
+//
+// If timeout is < 1, no timeout is enforced.
+//
+// If concurrency is < 1, GOMAXPROCS is used as the concurrency limit but at least 8.
+//
+// The returned middleware serves http.StatusServiceUnavailable (503) for requests that
+// would exceed the number.
+func makeLimiter(timeout time.Duration, concurrency int) func(http.Handler) http.Handler {
+	if concurrency < 1 {
+		concurrency = runtime.GOMAXPROCS(0)
+		if concurrency < 8 {
+			concurrency = 8
+		}
+	}
+	inFlightSem := make(chan struct{}, concurrency)
+
+	return func(h http.Handler) http.Handler {
+		concLimiter := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
+			if req.Method == http.MethodGet { // Only limit concurrency of GETs.
+				select {
+				case inFlightSem <- struct{}{}: // All good, carry on.
+					defer func() { <-inFlightSem }()
+				default:
+					http.Error(rsp, fmt.Sprintf(
+						"Limit of concurrent GET requests reached (%d), try again later.\n", concurrency,
+					), http.StatusServiceUnavailable)
+					return
+				}
+			}
+			h.ServeHTTP(rsp, req)
+		})
+		if timeout <= 0 {
+			return concLimiter
+		}
+		return http.TimeoutHandler(concLimiter, timeout, fmt.Sprintf(
+			"Exceeded configured timeout of %v.\n", timeout,
+		))
+	}
+}
diff --git a/cmd/alertmanager/main.go b/cmd/alertmanager/main.go
index 9d1f31c7..b6a649da 100644
--- a/cmd/alertmanager/main.go
+++ b/cmd/alertmanager/main.go
@@ -123,9 +123,11 @@ func run() int {
 		retention       = kingpin.Flag("data.retention", "How long to keep data for.").Default("120h").Duration()
 		alertGCInterval = kingpin.Flag("alerts.gc-interval", "Interval between alert GC.").Default("30m").Duration()
 
-		externalURL   = kingpin.Flag("web.external-url", "The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager. If omitted, relevant URL components will be derived automatically.").String()
-		routePrefix   = kingpin.Flag("web.route-prefix", "Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url.").String()
-		listenAddress = kingpin.Flag("web.listen-address", "Address to listen on for the web interface and API.").Default(":9093").String()
+		externalURL    = kingpin.Flag("web.external-url", "The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager. If omitted, relevant URL components will be derived automatically.").String()
+		routePrefix    = kingpin.Flag("web.route-prefix", "Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url.").String()
+		listenAddress  = kingpin.Flag("web.listen-address", "Address to listen on for the web interface and API.").Default(":9093").String()
+		getConcurrency = kingpin.Flag("web.get-concurrency", "Maximum number of GET requests processed concurrently. If negative or zero, the limit is GOMAXPROC or 8, whatever is larger.").Default("0").Int()
+		getTimeout     = kingpin.Flag("web.timeout", "Timeout for HTTP requests. If negative or zero, no timeout is set.").Default("0").Duration()
 
 		clusterBindAddr = kingpin.Flag("cluster.listen-address", "Listen address for cluster.").
 				Default(defaultClusterAddr).String()
@@ -373,7 +375,7 @@ func run() int {
 
 	ui.Register(router, webReload, logger)
 
-	mux := api.Register(router, *routePrefix)
+	mux := api.Register(router, *routePrefix, *getTimeout, *getConcurrency)
 
 	srv := http.Server{Addr: *listenAddress, Handler: mux}
 	srvc := make(chan struct{})

From 4747fd9b2fe76fc9557dcb441e65e0517e56ad97 Mon Sep 17 00:00:00 2001
From: beorn7 <beorn@soundcloud.com>
Date: Wed, 6 Feb 2019 18:15:19 +0100
Subject: [PATCH 2/4] Propagate timeout to alert listing via context

The context is created by the http.TimeoutHandler we use to set the
timeout.

I believe this is the only endpoint where propagating the timeout is
feasible and needed.

Signed-off-by: beorn7 <beorn@soundcloud.com>
---
 api/v1/api.go | 5 ++++-
 api/v2/api.go | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/api/v1/api.go b/api/v1/api.go
index a8e2cc52..221781c3 100644
--- a/api/v1/api.go
+++ b/api/v1/api.go
@@ -253,6 +253,7 @@ func (api *API) listAlerts(w http.ResponseWriter, r *http.Request) {
 		// are no alerts present
 		res      = []*Alert{}
 		matchers = []*labels.Matcher{}
+		ctx      = r.Context()
 
 		showActive, showInhibited     bool
 		showSilenced, showUnprocessed bool
@@ -326,11 +327,13 @@ func (api *API) listAlerts(w http.ResponseWriter, r *http.Request) {
 	defer alerts.Close()
 
 	api.mtx.RLock()
-	// TODO(fabxc): enforce a sensible timeout.
 	for a := range alerts.Next() {
 		if err = alerts.Err(); err != nil {
 			break
 		}
+		if err = ctx.Err(); err != nil {
+			break
+		}
 
 		routes := api.route.Match(a.Labels)
 		receivers := make([]string, 0, len(routes))
diff --git a/api/v2/api.go b/api/v2/api.go
index b0e619ed..ae633342 100644
--- a/api/v2/api.go
+++ b/api/v2/api.go
@@ -195,6 +195,7 @@ func (api *API) getAlertsHandler(params alert_ops.GetAlertsParams) middleware.Re
 		// are no alerts present
 		res      = open_api_models.GettableAlerts{}
 		matchers = []*labels.Matcher{}
+		ctx      = params.HTTPRequest.Context()
 	)
 
 	if params.Filter != nil {
@@ -224,11 +225,13 @@ func (api *API) getAlertsHandler(params alert_ops.GetAlertsParams) middleware.Re
 	defer alerts.Close()
 
 	api.mtx.RLock()
-	// TODO(fabxc): enforce a sensible timeout.
 	for a := range alerts.Next() {
 		if err = alerts.Err(); err != nil {
 			break
 		}
+		if err = ctx.Err(); err != nil {
+			break
+		}
 
 		routes := api.route.Match(a.Labels)
 		receivers := make([]*open_api_models.Receiver, 0, len(routes))

From 3382a0e949267fe902de17ca0d0e1cd58b9d3c80 Mon Sep 17 00:00:00 2001
From: beorn7 <beorn@soundcloud.com>
Date: Thu, 7 Feb 2019 15:53:56 +0100
Subject: [PATCH 3/4] Add HTTP instrumentation for GET requests in flight

While the newly added in-flight instrumentation works for all GET
requests, the existing HTTP instrumentation omits api/v2 calls. This
commit adds a TODO note about that.

Signed-off-by: beorn7 <beorn@soundcloud.com>
---
 api/api.go               | 22 +++++++++++++++++++++-
 cmd/alertmanager/main.go |  5 +++--
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/api/api.go b/api/api.go
index 52e249e6..8241f730 100644
--- a/api/api.go
+++ b/api/api.go
@@ -26,12 +26,24 @@ import (
 	"github.com/prometheus/alertmanager/provider"
 	"github.com/prometheus/alertmanager/silence"
 	"github.com/prometheus/alertmanager/types"
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/common/route"
 
 	"github.com/go-kit/kit/log"
 )
 
+var (
+	requestsInFlight = prometheus.NewGauge(prometheus.GaugeOpts{
+		Name: "alertmanager_http_get_requests_in_flight",
+		Help: "Current number of HTTP GET requests being processed.",
+	})
+)
+
+func init() {
+	prometheus.MustRegister(requestsInFlight)
+}
+
 // API represents all APIs of Alertmanager.
 type API struct {
 	v1 *apiv1.API
@@ -101,6 +113,10 @@ func (api *API) Register(
 	if routePrefix != "/" {
 		apiPrefix = routePrefix
 	}
+	// TODO(beorn7): HTTP instrumentation is only in place for Router. Since
+	// /api/v2 works on the Handler level, it is currently not instrumented
+	// at all (with the exception of requestsInFlight, which is handled in
+	// the Limiter below).
 	mux.Handle(
 		apiPrefix+"/api/v2/",
 		limiter(http.StripPrefix(apiPrefix+"/api/v2", api.v2.Handler)),
@@ -142,7 +158,11 @@ func makeLimiter(timeout time.Duration, concurrency int) func(http.Handler) http
 			if req.Method == http.MethodGet { // Only limit concurrency of GETs.
 				select {
 				case inFlightSem <- struct{}{}: // All good, carry on.
-					defer func() { <-inFlightSem }()
+					requestsInFlight.Inc()
+					defer func() {
+						<-inFlightSem
+						requestsInFlight.Dec()
+					}()
 				default:
 					http.Error(rsp, fmt.Sprintf(
 						"Limit of concurrent GET requests reached (%d), try again later.\n", concurrency,
diff --git a/cmd/alertmanager/main.go b/cmd/alertmanager/main.go
index b6a649da..a3784d4d 100644
--- a/cmd/alertmanager/main.go
+++ b/cmd/alertmanager/main.go
@@ -96,10 +96,11 @@ func init() {
 }
 
 func instrumentHandler(handlerName string, handler http.HandlerFunc) http.HandlerFunc {
+	handlerLabel := prometheus.Labels{"handler": handlerName}
 	return promhttp.InstrumentHandlerDuration(
-		requestDuration.MustCurryWith(prometheus.Labels{"handler": handlerName}),
+		requestDuration.MustCurryWith(handlerLabel),
 		promhttp.InstrumentHandlerResponseSize(
-			responseSize.MustCurryWith(prometheus.Labels{"handler": handlerName}),
+			responseSize.MustCurryWith(handlerLabel),
 			handler,
 		),
 	)

From 21de9ff88c7f2b312139715da676a7a71d084791 Mon Sep 17 00:00:00 2001
From: beorn7 <beorn@soundcloud.com>
Date: Tue, 12 Feb 2019 17:42:11 +0100
Subject: [PATCH 4/4] Various improvements after code review

Most importantly, `api.New` now takes an `Options` struct as an
argument, which allows some other things done here as well:

- Timout and concurrency limit are now in the options, streamlining
  the registration and the implementation of the limiting middleware.

- A local registry is used for metrics, and the metrics used so far
  inside any of the api packages are using it now.

The 'in flight' metric now contains the 'get' as a method label. I
have also added a TODO to instrument other methods in the same way
(otherwise, the label doesn't reall make sense, semantically). I have
also added an explicit error counter for requests rejected because of
the concurrency limit. (They also show up as 503s in the generic HTTP
instrumentation (or they would, if v2 were instrumented, too), but
those 503s might have a number of reasons, while users might want to
alert on concurrency limit problems explicitly).

Signed-off-by: beorn7 <beorn@soundcloud.com>
---
 api/api.go               | 221 +++++++++++++++++++++++----------------
 api/v1/api.go            |  63 +++++------
 api/v1/api_test.go       |   4 +-
 cmd/alertmanager/main.go |  23 ++--
 4 files changed, 177 insertions(+), 134 deletions(-)

diff --git a/api/api.go b/api/api.go
index 8241f730..bdebb177 100644
--- a/api/api.go
+++ b/api/api.go
@@ -14,6 +14,7 @@
 package api
 
 import (
+	"errors"
 	"fmt"
 	"net/http"
 	"runtime"
@@ -33,44 +34,87 @@ import (
 	"github.com/go-kit/kit/log"
 )
 
-var (
-	requestsInFlight = prometheus.NewGauge(prometheus.GaugeOpts{
-		Name: "alertmanager_http_get_requests_in_flight",
-		Help: "Current number of HTTP GET requests being processed.",
-	})
-)
-
-func init() {
-	prometheus.MustRegister(requestsInFlight)
-}
-
 // API represents all APIs of Alertmanager.
 type API struct {
-	v1 *apiv1.API
-	v2 *apiv2.API
+	v1                       *apiv1.API
+	v2                       *apiv2.API
+	requestsInFlight         prometheus.Gauge
+	concurrencyLimitExceeded prometheus.Counter
+	timeout                  time.Duration
+	inFlightSem              chan struct{}
+}
+
+// Options for the creation of an API object. Alerts, Silences, and StatusFunc
+// are mandatory to set. The zero value for everything else is a safe default.
+type Options struct {
+	// Alerts to be used by the API. Mandatory.
+	Alerts provider.Alerts
+	// Silences to be used by the API. Mandatory.
+	Silences *silence.Silences
+	// StatusFunc is used be the API to retrieve the AlertStatus of an
+	// alert. Mandatory.
+	StatusFunc func(model.Fingerprint) types.AlertStatus
+	// Peer from the gossip cluster. If nil, no clustering will be used.
+	Peer *cluster.Peer
+	// Timeout for all HTTP connections. The zero value (and negative
+	// values) result in no timeout.
+	Timeout time.Duration
+	// Concurrency limit for GET requests. The zero value (and negative
+	// values) result in a limit of GOMAXPROCS or 8, whichever is
+	// larger. Status code 503 is served for GET requests that would exceed
+	// the concurrency limit.
+	Concurrency int
+	// Logger is used for logging, if nil, no logging will happen.
+	Logger log.Logger
+	// Registry is used to register Prometheus metrics. If nil, no metrics
+	// registration will happen.
+	Registry prometheus.Registerer
+}
+
+func (o Options) validate() error {
+	if o.Alerts == nil {
+		return errors.New("mandatory field Alerts not set")
+	}
+	if o.Silences == nil {
+		return errors.New("mandatory field Silences not set")
+	}
+	if o.StatusFunc == nil {
+		return errors.New("mandatory field StatusFunc not set")
+	}
+	return nil
 }
 
 // New creates a new API object combining all API versions.
-func New(
-	alerts provider.Alerts,
-	silences *silence.Silences,
-	sf func(model.Fingerprint) types.AlertStatus,
-	peer *cluster.Peer,
-	l log.Logger,
-) (*API, error) {
+func New(opts Options) (*API, error) {
+	if err := opts.validate(); err != nil {
+		return nil, fmt.Errorf("invalid API options: %s", err)
+	}
+	l := opts.Logger
+	if l == nil {
+		l = log.NewNopLogger()
+	}
+	concurrency := opts.Concurrency
+	if concurrency < 1 {
+		concurrency = runtime.GOMAXPROCS(0)
+		if concurrency < 8 {
+			concurrency = 8
+		}
+	}
+
 	v1 := apiv1.New(
-		alerts,
-		silences,
-		sf,
-		peer,
+		opts.Alerts,
+		opts.Silences,
+		opts.StatusFunc,
+		opts.Peer,
 		log.With(l, "version", "v1"),
+		opts.Registry,
 	)
 
 	v2, err := apiv2.NewAPI(
-		alerts,
-		sf,
-		silences,
-		peer,
+		opts.Alerts,
+		opts.StatusFunc,
+		opts.Silences,
+		opts.Peer,
 		log.With(l, "version", "v2"),
 	)
 
@@ -78,9 +122,34 @@ func New(
 		return nil, err
 	}
 
+	// TODO(beorn7): For now, this hardcodes the method="get" label. Other
+	// methods should get the same instrumentation.
+	requestsInFlight := prometheus.NewGauge(prometheus.GaugeOpts{
+		Name:        "alertmanager_http_requests_in_flight",
+		Help:        "Current number of HTTP requests being processed.",
+		ConstLabels: prometheus.Labels{"method": "get"},
+	})
+	concurrencyLimitExceeded := prometheus.NewCounter(prometheus.CounterOpts{
+		Name:        "alertmanager_http_concurrency_limit_exceeded_total",
+		Help:        "Total number of times an HTTP request failed because the concurrency limit was reached.",
+		ConstLabels: prometheus.Labels{"method": "get"},
+	})
+	if opts.Registry != nil {
+		if err := opts.Registry.Register(requestsInFlight); err != nil {
+			return nil, err
+		}
+		if err := opts.Registry.Register(concurrencyLimitExceeded); err != nil {
+			return nil, err
+		}
+	}
+
 	return &API{
-		v1: v1,
-		v2: v2,
+		v1:                       v1,
+		v2:                       v2,
+		requestsInFlight:         requestsInFlight,
+		concurrencyLimitExceeded: concurrencyLimitExceeded,
+		timeout:                  opts.Timeout,
+		inFlightSem:              make(chan struct{}, concurrency),
 	}, nil
 }
 
@@ -88,26 +157,15 @@ func New(
 // APIv2 works on the http.Handler level, this method also creates a new
 // http.ServeMux and then uses it to register both the provided router (to
 // handle "/") and APIv2 (to handle "<routePrefix>/api/v2"). The method returns
-// the newly created http.ServeMux.
-//
-// If the provided value for timeout is positive, it is enforced for all HTTP
-// requests. (Negative or zero results in no timeout at all.)
-//
-// If the provided value for concurrency is positive, it limits the number of
-// concurrently processed GET requests. Otherwise, the number of concurrently
-// processed GET requests is limited to GOMAXPROCS or 8, whatever is
-// larger. Status code 503 is served for GET requests that would exceed the
-// concurrency limit.
-func (api *API) Register(
-	r *route.Router, routePrefix string,
-	timeout time.Duration, concurrency int,
-) *http.ServeMux {
-	limiter := makeLimiter(timeout, concurrency)
-
+// the newly created http.ServeMux. If a timeout has been set on construction of
+// API, it is enforced for all HTTP request going through this mux. The same is
+// true for the concurrency limit, with the exception that it is only applied to
+// GET requests.
+func (api *API) Register(r *route.Router, routePrefix string) *http.ServeMux {
 	api.v1.Register(r.WithPrefix("/api/v1"))
 
 	mux := http.NewServeMux()
-	mux.Handle("/", limiter(r))
+	mux.Handle("/", api.limitHandler(r))
 
 	apiPrefix := ""
 	if routePrefix != "/" {
@@ -116,10 +174,10 @@ func (api *API) Register(
 	// TODO(beorn7): HTTP instrumentation is only in place for Router. Since
 	// /api/v2 works on the Handler level, it is currently not instrumented
 	// at all (with the exception of requestsInFlight, which is handled in
-	// the Limiter below).
+	// limitHandler below).
 	mux.Handle(
 		apiPrefix+"/api/v2/",
-		limiter(http.StripPrefix(apiPrefix+"/api/v2", api.v2.Handler)),
+		api.limitHandler(http.StripPrefix(apiPrefix+"/api/v2", api.v2.Handler)),
 	)
 
 	return mux
@@ -134,49 +192,30 @@ func (api *API) Update(cfg *config.Config, resolveTimeout time.Duration) error {
 	return api.v2.Update(cfg, resolveTimeout)
 }
 
-// makeLimiter returns an HTTP middleware that sets a timeout for HTTP requests
-// and also limits the number of concurrently processed GET requests to the
-// given number.
-//
-// If timeout is < 1, no timeout is enforced.
-//
-// If concurrency is < 1, GOMAXPROCS is used as the concurrency limit but at least 8.
-//
-// The returned middleware serves http.StatusServiceUnavailable (503) for requests that
-// would exceed the number.
-func makeLimiter(timeout time.Duration, concurrency int) func(http.Handler) http.Handler {
-	if concurrency < 1 {
-		concurrency = runtime.GOMAXPROCS(0)
-		if concurrency < 8 {
-			concurrency = 8
-		}
-	}
-	inFlightSem := make(chan struct{}, concurrency)
-
-	return func(h http.Handler) http.Handler {
-		concLimiter := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
-			if req.Method == http.MethodGet { // Only limit concurrency of GETs.
-				select {
-				case inFlightSem <- struct{}{}: // All good, carry on.
-					requestsInFlight.Inc()
-					defer func() {
-						<-inFlightSem
-						requestsInFlight.Dec()
-					}()
-				default:
-					http.Error(rsp, fmt.Sprintf(
-						"Limit of concurrent GET requests reached (%d), try again later.\n", concurrency,
-					), http.StatusServiceUnavailable)
-					return
-				}
+func (api *API) limitHandler(h http.Handler) http.Handler {
+	concLimiter := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
+		if req.Method == http.MethodGet { // Only limit concurrency of GETs.
+			select {
+			case api.inFlightSem <- struct{}{}: // All good, carry on.
+				api.requestsInFlight.Inc()
+				defer func() {
+					<-api.inFlightSem
+					api.requestsInFlight.Dec()
+				}()
+			default:
+				api.concurrencyLimitExceeded.Inc()
+				http.Error(rsp, fmt.Sprintf(
+					"Limit of concurrent GET requests reached (%d), try again later.\n", cap(api.inFlightSem),
+				), http.StatusServiceUnavailable)
+				return
 			}
-			h.ServeHTTP(rsp, req)
-		})
-		if timeout <= 0 {
-			return concLimiter
 		}
-		return http.TimeoutHandler(concLimiter, timeout, fmt.Sprintf(
-			"Exceeded configured timeout of %v.\n", timeout,
-		))
+		h.ServeHTTP(rsp, req)
+	})
+	if api.timeout <= 0 {
+		return concLimiter
 	}
+	return http.TimeoutHandler(concLimiter, api.timeout, fmt.Sprintf(
+		"Exceeded configured timeout of %v.\n", api.timeout,
+	))
 }
diff --git a/api/v1/api.go b/api/v1/api.go
index 221781c3..56245996 100644
--- a/api/v1/api.go
+++ b/api/v1/api.go
@@ -41,28 +41,6 @@ import (
 	"github.com/prometheus/alertmanager/types"
 )
 
-var (
-	numReceivedAlerts = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "alertmanager",
-		Name:      "alerts_received_total",
-		Help:      "The total number of received alerts.",
-	}, []string{"status"})
-
-	numInvalidAlerts = prometheus.NewCounter(prometheus.CounterOpts{
-		Namespace: "alertmanager",
-		Name:      "alerts_invalid_total",
-		Help:      "The total number of received alerts that were invalid.",
-	})
-)
-
-func init() {
-	numReceivedAlerts.WithLabelValues("firing")
-	numReceivedAlerts.WithLabelValues("resolved")
-
-	prometheus.MustRegister(numReceivedAlerts)
-	prometheus.MustRegister(numInvalidAlerts)
-}
-
 var corsHeaders = map[string]string{
 	"Access-Control-Allow-Headers":  "Accept, Authorization, Content-Type, Origin",
 	"Access-Control-Allow-Methods":  "GET, DELETE, OPTIONS",
@@ -98,6 +76,9 @@ type API struct {
 	peer           *cluster.Peer
 	logger         log.Logger
 
+	numReceivedAlerts *prometheus.CounterVec
+	numInvalidAlerts  prometheus.Counter
+
 	getAlertStatus getAlertStatusFn
 
 	mtx sync.RWMutex
@@ -112,18 +93,38 @@ func New(
 	sf getAlertStatusFn,
 	peer *cluster.Peer,
 	l log.Logger,
+	r prometheus.Registerer,
 ) *API {
 	if l == nil {
 		l = log.NewNopLogger()
 	}
 
+	numReceivedAlerts := prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "alertmanager",
+		Name:      "alerts_received_total",
+		Help:      "The total number of received alerts.",
+	}, []string{"status"})
+
+	numInvalidAlerts := prometheus.NewCounter(prometheus.CounterOpts{
+		Namespace: "alertmanager",
+		Name:      "alerts_invalid_total",
+		Help:      "The total number of received alerts that were invalid.",
+	})
+	numReceivedAlerts.WithLabelValues("firing")
+	numReceivedAlerts.WithLabelValues("resolved")
+	if r != nil {
+		r.MustRegister(numReceivedAlerts, numInvalidAlerts)
+	}
+
 	return &API{
-		alerts:         alerts,
-		silences:       silences,
-		getAlertStatus: sf,
-		uptime:         time.Now(),
-		peer:           peer,
-		logger:         l,
+		alerts:            alerts,
+		silences:          silences,
+		getAlertStatus:    sf,
+		uptime:            time.Now(),
+		peer:              peer,
+		logger:            l,
+		numReceivedAlerts: numReceivedAlerts,
+		numInvalidAlerts:  numInvalidAlerts,
 	}
 }
 
@@ -452,9 +453,9 @@ func (api *API) insertAlerts(w http.ResponseWriter, r *http.Request, alerts ...*
 			alert.EndsAt = now.Add(resolveTimeout)
 		}
 		if alert.EndsAt.After(time.Now()) {
-			numReceivedAlerts.WithLabelValues("firing").Inc()
+			api.numReceivedAlerts.WithLabelValues("firing").Inc()
 		} else {
-			numReceivedAlerts.WithLabelValues("resolved").Inc()
+			api.numReceivedAlerts.WithLabelValues("resolved").Inc()
 		}
 	}
 
@@ -468,7 +469,7 @@ func (api *API) insertAlerts(w http.ResponseWriter, r *http.Request, alerts ...*
 
 		if err := a.Validate(); err != nil {
 			validationErrs.Add(err)
-			numInvalidAlerts.Inc()
+			api.numInvalidAlerts.Inc()
 			continue
 		}
 		validAlerts = append(validAlerts, a)
diff --git a/api/v1/api_test.go b/api/v1/api_test.go
index a2991ab4..b83ea007 100644
--- a/api/v1/api_test.go
+++ b/api/v1/api_test.go
@@ -132,7 +132,7 @@ func TestAddAlerts(t *testing.T) {
 		}
 
 		alertsProvider := newFakeAlerts([]*types.Alert{}, tc.err)
-		api := New(alertsProvider, nil, newGetAlertStatus(alertsProvider), nil, nil)
+		api := New(alertsProvider, nil, newGetAlertStatus(alertsProvider), nil, nil, nil)
 
 		r, err := http.NewRequest("POST", "/api/v1/alerts", bytes.NewReader(b))
 		w := httptest.NewRecorder()
@@ -259,7 +259,7 @@ func TestListAlerts(t *testing.T) {
 		},
 	} {
 		alertsProvider := newFakeAlerts(alerts, tc.err)
-		api := New(alertsProvider, nil, newGetAlertStatus(alertsProvider), nil, nil)
+		api := New(alertsProvider, nil, newGetAlertStatus(alertsProvider), nil, nil, nil)
 		api.route = dispatch.NewRoute(&config.Route{Receiver: "def-receiver"}, nil)
 
 		r, err := http.NewRequest("GET", "/api/v1/alerts", nil)
diff --git a/cmd/alertmanager/main.go b/cmd/alertmanager/main.go
index a3784d4d..b799edd7 100644
--- a/cmd/alertmanager/main.go
+++ b/cmd/alertmanager/main.go
@@ -127,8 +127,8 @@ func run() int {
 		externalURL    = kingpin.Flag("web.external-url", "The URL under which Alertmanager is externally reachable (for example, if Alertmanager is served via a reverse proxy). Used for generating relative and absolute links back to Alertmanager itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Alertmanager. If omitted, relevant URL components will be derived automatically.").String()
 		routePrefix    = kingpin.Flag("web.route-prefix", "Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url.").String()
 		listenAddress  = kingpin.Flag("web.listen-address", "Address to listen on for the web interface and API.").Default(":9093").String()
-		getConcurrency = kingpin.Flag("web.get-concurrency", "Maximum number of GET requests processed concurrently. If negative or zero, the limit is GOMAXPROC or 8, whatever is larger.").Default("0").Int()
-		getTimeout     = kingpin.Flag("web.timeout", "Timeout for HTTP requests. If negative or zero, no timeout is set.").Default("0").Duration()
+		getConcurrency = kingpin.Flag("web.get-concurrency", "Maximum number of GET requests processed concurrently. If negative or zero, the limit is GOMAXPROC or 8, whichever is larger.").Default("0").Int()
+		httpTimeout    = kingpin.Flag("web.timeout", "Timeout for HTTP requests. If negative or zero, no timeout is set.").Default("0").Duration()
 
 		clusterBindAddr = kingpin.Flag("cluster.listen-address", "Listen address for cluster.").
 				Default(defaultClusterAddr).String()
@@ -270,13 +270,16 @@ func run() int {
 	)
 	defer disp.Stop()
 
-	api, err := api.New(
-		alerts,
-		silences,
-		marker.Status,
-		peer,
-		log.With(logger, "component", "api"),
-	)
+	api, err := api.New(api.Options{
+		Alerts:      alerts,
+		Silences:    silences,
+		StatusFunc:  marker.Status,
+		Peer:        peer,
+		Timeout:     *httpTimeout,
+		Concurrency: *getConcurrency,
+		Logger:      log.With(logger, "component", "api"),
+		Registry:    prometheus.DefaultRegisterer,
+	})
 
 	if err != nil {
 		level.Error(logger).Log("err", fmt.Errorf("failed to create API: %v", err.Error()))
@@ -376,7 +379,7 @@ func run() int {
 
 	ui.Register(router, webReload, logger)
 
-	mux := api.Register(router, *routePrefix, *getTimeout, *getConcurrency)
+	mux := api.Register(router, *routePrefix)
 
 	srv := http.Server{Addr: *listenAddress, Handler: mux}
 	srvc := make(chan struct{})