mirror of
https://github.com/prometheus/prometheus
synced 2024-12-29 18:22:16 +00:00
b3a39ccd8a
Right now the /alerts page of Prometheus sorts alerts by severity (firing, pending, inactive). Once multiple alerts have the same severity, their order seems to correlate to how they are placed in the configuration files, but not always. Looking at the code, we make use of sort.Sort(), which is documented not to provide a stable sort. The Less() function also only takes the alert state into account. This change extends the Less() function to provide a lexicographic order on both the alert state and the name. This means I can finally find the alerts I'm looking for without using my browser's search feature.
561 lines
16 KiB
Go
561 lines
16 KiB
Go
// Copyright 2013 The Prometheus Authors
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package web
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"net"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
pprof_runtime "runtime/pprof"
|
|
template_text "text/template"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/common/log"
|
|
"github.com/prometheus/common/model"
|
|
"github.com/prometheus/common/route"
|
|
"golang.org/x/net/context"
|
|
"golang.org/x/net/netutil"
|
|
|
|
"github.com/prometheus/prometheus/config"
|
|
"github.com/prometheus/prometheus/notifier"
|
|
"github.com/prometheus/prometheus/promql"
|
|
"github.com/prometheus/prometheus/retrieval"
|
|
"github.com/prometheus/prometheus/rules"
|
|
"github.com/prometheus/prometheus/storage/local"
|
|
"github.com/prometheus/prometheus/template"
|
|
"github.com/prometheus/prometheus/util/httputil"
|
|
api_v1 "github.com/prometheus/prometheus/web/api/v1"
|
|
"github.com/prometheus/prometheus/web/ui"
|
|
)
|
|
|
|
var localhostRepresentations = []string{"127.0.0.1", "localhost"}
|
|
|
|
// Handler serves various HTTP endpoints of the Prometheus server
|
|
type Handler struct {
|
|
targetManager *retrieval.TargetManager
|
|
ruleManager *rules.Manager
|
|
queryEngine *promql.Engine
|
|
context context.Context
|
|
storage local.Storage
|
|
notifier *notifier.Notifier
|
|
|
|
apiV1 *api_v1.API
|
|
|
|
router *route.Router
|
|
listenErrCh chan error
|
|
quitCh chan struct{}
|
|
reloadCh chan chan error
|
|
options *Options
|
|
configString string
|
|
versionInfo *PrometheusVersion
|
|
birth time.Time
|
|
cwd string
|
|
flagsMap map[string]string
|
|
|
|
externalLabels model.LabelSet
|
|
mtx sync.RWMutex
|
|
now func() model.Time
|
|
}
|
|
|
|
// ApplyConfig updates the status state as the new config requires.
|
|
func (h *Handler) ApplyConfig(conf *config.Config) error {
|
|
h.mtx.Lock()
|
|
defer h.mtx.Unlock()
|
|
|
|
h.externalLabels = conf.GlobalConfig.ExternalLabels
|
|
h.configString = conf.String()
|
|
|
|
return nil
|
|
}
|
|
|
|
// PrometheusVersion contains build information about Prometheus.
|
|
type PrometheusVersion struct {
|
|
Version string `json:"version"`
|
|
Revision string `json:"revision"`
|
|
Branch string `json:"branch"`
|
|
BuildUser string `json:"buildUser"`
|
|
BuildDate string `json:"buildDate"`
|
|
GoVersion string `json:"goVersion"`
|
|
}
|
|
|
|
// Options for the web Handler.
|
|
type Options struct {
|
|
Context context.Context
|
|
Storage local.Storage
|
|
QueryEngine *promql.Engine
|
|
TargetManager *retrieval.TargetManager
|
|
RuleManager *rules.Manager
|
|
Notifier *notifier.Notifier
|
|
Version *PrometheusVersion
|
|
Flags map[string]string
|
|
|
|
ListenAddress string
|
|
ReadTimeout time.Duration
|
|
MaxConnections int
|
|
ExternalURL *url.URL
|
|
RoutePrefix string
|
|
MetricsPath string
|
|
UseLocalAssets bool
|
|
UserAssetsPath string
|
|
ConsoleTemplatesPath string
|
|
ConsoleLibrariesPath string
|
|
EnableQuit bool
|
|
}
|
|
|
|
// New initializes a new web Handler.
|
|
func New(o *Options) *Handler {
|
|
router := route.New(func(r *http.Request) (context.Context, error) {
|
|
return o.Context, nil
|
|
})
|
|
|
|
cwd, err := os.Getwd()
|
|
|
|
if err != nil {
|
|
cwd = "<error retrieving current working directory>"
|
|
}
|
|
|
|
h := &Handler{
|
|
router: router,
|
|
listenErrCh: make(chan error),
|
|
quitCh: make(chan struct{}),
|
|
reloadCh: make(chan chan error),
|
|
options: o,
|
|
versionInfo: o.Version,
|
|
birth: time.Now(),
|
|
cwd: cwd,
|
|
flagsMap: o.Flags,
|
|
|
|
context: o.Context,
|
|
targetManager: o.TargetManager,
|
|
ruleManager: o.RuleManager,
|
|
queryEngine: o.QueryEngine,
|
|
storage: o.Storage,
|
|
notifier: o.Notifier,
|
|
|
|
apiV1: api_v1.NewAPI(o.QueryEngine, o.Storage, o.TargetManager),
|
|
now: model.Now,
|
|
}
|
|
|
|
if o.RoutePrefix != "/" {
|
|
// If the prefix is missing for the root path, prepend it.
|
|
router.Get("/", func(w http.ResponseWriter, r *http.Request) {
|
|
http.Redirect(w, r, o.RoutePrefix, http.StatusFound)
|
|
})
|
|
router = router.WithPrefix(o.RoutePrefix)
|
|
}
|
|
|
|
instrh := prometheus.InstrumentHandler
|
|
instrf := prometheus.InstrumentHandlerFunc
|
|
|
|
router.Get("/", func(w http.ResponseWriter, r *http.Request) {
|
|
router.Redirect(w, r, "/graph", http.StatusFound)
|
|
})
|
|
|
|
router.Get("/alerts", instrf("alerts", h.alerts))
|
|
router.Get("/graph", instrf("graph", h.graph))
|
|
router.Get("/status", instrf("status", h.status))
|
|
router.Get("/flags", instrf("flags", h.flags))
|
|
router.Get("/config", instrf("config", h.config))
|
|
router.Get("/rules", instrf("rules", h.rules))
|
|
router.Get("/targets", instrf("targets", h.targets))
|
|
router.Get("/version", instrf("version", h.version))
|
|
|
|
router.Get("/heap", instrf("heap", dumpHeap))
|
|
|
|
router.Get(o.MetricsPath, prometheus.Handler().ServeHTTP)
|
|
|
|
router.Get("/federate", instrh("federate", httputil.CompressionHandler{
|
|
Handler: http.HandlerFunc(h.federation),
|
|
}))
|
|
|
|
h.apiV1.Register(router.WithPrefix("/api/v1"))
|
|
|
|
router.Get("/consoles/*filepath", instrf("consoles", h.consoles))
|
|
|
|
router.Get("/static/*filepath", instrf("static", serveStaticAsset))
|
|
|
|
if o.UserAssetsPath != "" {
|
|
router.Get("/user/*filepath", instrf("user", route.FileServe(o.UserAssetsPath)))
|
|
}
|
|
|
|
if o.EnableQuit {
|
|
router.Post("/-/quit", h.quit)
|
|
}
|
|
|
|
router.Post("/-/reload", h.reload)
|
|
router.Get("/-/reload", func(w http.ResponseWriter, r *http.Request) {
|
|
w.WriteHeader(http.StatusMethodNotAllowed)
|
|
fmt.Fprintf(w, "This endpoint requires a POST request.\n")
|
|
})
|
|
|
|
router.Get("/debug/*subpath", http.DefaultServeMux.ServeHTTP)
|
|
router.Post("/debug/*subpath", http.DefaultServeMux.ServeHTTP)
|
|
|
|
return h
|
|
}
|
|
|
|
func serveStaticAsset(w http.ResponseWriter, req *http.Request) {
|
|
fp := route.Param(route.Context(req), "filepath")
|
|
fp = filepath.Join("web/ui/static", fp)
|
|
|
|
info, err := ui.AssetInfo(fp)
|
|
if err != nil {
|
|
log.With("file", fp).Warn("Could not get file info: ", err)
|
|
w.WriteHeader(http.StatusNotFound)
|
|
return
|
|
}
|
|
file, err := ui.Asset(fp)
|
|
if err != nil {
|
|
if err != io.EOF {
|
|
log.With("file", fp).Warn("Could not get file: ", err)
|
|
}
|
|
w.WriteHeader(http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
http.ServeContent(w, req, info.Name(), info.ModTime(), bytes.NewReader(file))
|
|
}
|
|
|
|
// ListenError returns the receive-only channel that signals errors while starting the web server.
|
|
func (h *Handler) ListenError() <-chan error {
|
|
return h.listenErrCh
|
|
}
|
|
|
|
// Quit returns the receive-only quit channel.
|
|
func (h *Handler) Quit() <-chan struct{} {
|
|
return h.quitCh
|
|
}
|
|
|
|
// Reload returns the receive-only channel that signals configuration reload requests.
|
|
func (h *Handler) Reload() <-chan chan error {
|
|
return h.reloadCh
|
|
}
|
|
|
|
// Run serves the HTTP endpoints.
|
|
func (h *Handler) Run() {
|
|
log.Infof("Listening on %s", h.options.ListenAddress)
|
|
server := &http.Server{
|
|
Addr: h.options.ListenAddress,
|
|
Handler: h.router,
|
|
ErrorLog: log.NewErrorLogger(),
|
|
ReadTimeout: h.options.ReadTimeout,
|
|
}
|
|
listener, err := net.Listen("tcp", h.options.ListenAddress)
|
|
if err != nil {
|
|
h.listenErrCh <- err
|
|
} else {
|
|
limitedListener := netutil.LimitListener(listener, h.options.MaxConnections)
|
|
h.listenErrCh <- server.Serve(limitedListener)
|
|
}
|
|
}
|
|
|
|
func (h *Handler) alerts(w http.ResponseWriter, r *http.Request) {
|
|
alerts := h.ruleManager.AlertingRules()
|
|
alertsSorter := byAlertStateAndNameSorter{alerts: alerts}
|
|
sort.Sort(alertsSorter)
|
|
|
|
alertStatus := AlertStatus{
|
|
AlertingRules: alertsSorter.alerts,
|
|
AlertStateToRowClass: map[rules.AlertState]string{
|
|
rules.StateInactive: "success",
|
|
rules.StatePending: "warning",
|
|
rules.StateFiring: "danger",
|
|
},
|
|
}
|
|
h.executeTemplate(w, "alerts.html", alertStatus)
|
|
}
|
|
|
|
func (h *Handler) consoles(w http.ResponseWriter, r *http.Request) {
|
|
ctx := route.Context(r)
|
|
name := route.Param(ctx, "filepath")
|
|
|
|
file, err := http.Dir(h.options.ConsoleTemplatesPath).Open(name)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusNotFound)
|
|
return
|
|
}
|
|
text, err := ioutil.ReadAll(file)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
// Provide URL parameters as a map for easy use. Advanced users may have need for
|
|
// parameters beyond the first, so provide RawParams.
|
|
rawParams, err := url.ParseQuery(r.URL.RawQuery)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
params := map[string]string{}
|
|
for k, v := range rawParams {
|
|
params[k] = v[0]
|
|
}
|
|
data := struct {
|
|
RawParams url.Values
|
|
Params map[string]string
|
|
Path string
|
|
}{
|
|
RawParams: rawParams,
|
|
Params: params,
|
|
Path: strings.TrimLeft(name, "/"),
|
|
}
|
|
|
|
tmpl := template.NewTemplateExpander(h.context, string(text), "__console_"+name, data, h.now(), h.queryEngine, h.options.ExternalURL.Path)
|
|
filenames, err := filepath.Glob(h.options.ConsoleLibrariesPath + "/*.lib")
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
result, err := tmpl.ExpandHTML(filenames)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
io.WriteString(w, result)
|
|
}
|
|
|
|
func (h *Handler) graph(w http.ResponseWriter, r *http.Request) {
|
|
h.executeTemplate(w, "graph.html", nil)
|
|
}
|
|
|
|
func (h *Handler) status(w http.ResponseWriter, r *http.Request) {
|
|
h.executeTemplate(w, "status.html", struct {
|
|
Birth time.Time
|
|
CWD string
|
|
Version *PrometheusVersion
|
|
Alertmanagers []string
|
|
}{
|
|
Birth: h.birth,
|
|
CWD: h.cwd,
|
|
Version: h.versionInfo,
|
|
Alertmanagers: h.notifier.Alertmanagers(),
|
|
})
|
|
}
|
|
|
|
func (h *Handler) flags(w http.ResponseWriter, r *http.Request) {
|
|
h.executeTemplate(w, "flags.html", h.flagsMap)
|
|
}
|
|
|
|
func (h *Handler) config(w http.ResponseWriter, r *http.Request) {
|
|
h.mtx.RLock()
|
|
defer h.mtx.RUnlock()
|
|
|
|
h.executeTemplate(w, "config.html", h.configString)
|
|
}
|
|
|
|
func (h *Handler) rules(w http.ResponseWriter, r *http.Request) {
|
|
h.executeTemplate(w, "rules.html", h.ruleManager)
|
|
}
|
|
|
|
func (h *Handler) targets(w http.ResponseWriter, r *http.Request) {
|
|
// Bucket targets by job label
|
|
tps := map[string][]retrieval.Target{}
|
|
for _, t := range h.targetManager.Targets() {
|
|
job := string(t.Labels()[model.JobLabel])
|
|
tps[job] = append(tps[job], t)
|
|
}
|
|
|
|
h.executeTemplate(w, "targets.html", struct {
|
|
TargetPools map[string][]retrieval.Target
|
|
}{
|
|
TargetPools: tps,
|
|
})
|
|
}
|
|
|
|
func (h *Handler) version(w http.ResponseWriter, r *http.Request) {
|
|
dec := json.NewEncoder(w)
|
|
if err := dec.Encode(h.versionInfo); err != nil {
|
|
http.Error(w, fmt.Sprintf("error encoding JSON: %s", err), http.StatusInternalServerError)
|
|
}
|
|
}
|
|
|
|
func (h *Handler) quit(w http.ResponseWriter, r *http.Request) {
|
|
fmt.Fprintf(w, "Requesting termination... Goodbye!")
|
|
close(h.quitCh)
|
|
}
|
|
|
|
func (h *Handler) reload(w http.ResponseWriter, r *http.Request) {
|
|
rc := make(chan error)
|
|
h.reloadCh <- rc
|
|
if err := <-rc; err != nil {
|
|
http.Error(w, fmt.Sprintf("failed to reload config: %s", err), http.StatusInternalServerError)
|
|
}
|
|
}
|
|
|
|
func (h *Handler) consolesPath() string {
|
|
if _, err := os.Stat(h.options.ConsoleTemplatesPath + "/index.html"); !os.IsNotExist(err) {
|
|
return h.options.ExternalURL.Path + "/consoles/index.html"
|
|
}
|
|
if h.options.UserAssetsPath != "" {
|
|
if _, err := os.Stat(h.options.UserAssetsPath + "/index.html"); !os.IsNotExist(err) {
|
|
return h.options.ExternalURL.Path + "/user/index.html"
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func tmplFuncs(consolesPath string, opts *Options) template_text.FuncMap {
|
|
return template_text.FuncMap{
|
|
"since": func(t time.Time) time.Duration {
|
|
return time.Since(t) / time.Millisecond * time.Millisecond
|
|
},
|
|
"consolesPath": func() string { return consolesPath },
|
|
"pathPrefix": func() string { return opts.ExternalURL.Path },
|
|
"stripLabels": func(lset model.LabelSet, labels ...model.LabelName) model.LabelSet {
|
|
for _, ln := range labels {
|
|
delete(lset, ln)
|
|
}
|
|
return lset
|
|
},
|
|
"globalURL": func(u *url.URL) *url.URL {
|
|
host, port, err := net.SplitHostPort(u.Host)
|
|
if err != nil {
|
|
return u
|
|
}
|
|
for _, lhr := range localhostRepresentations {
|
|
if host == lhr {
|
|
_, ownPort, err := net.SplitHostPort(opts.ListenAddress)
|
|
if err != nil {
|
|
return u
|
|
}
|
|
|
|
if port == ownPort {
|
|
// Only in the case where the target is on localhost and its port is
|
|
// the same as the one we're listening on, we know for sure that
|
|
// we're monitoring our own process and that we need to change the
|
|
// scheme, hostname, and port to the externally reachable ones as
|
|
// well. We shouldn't need to touch the path at all, since if a
|
|
// path prefix is defined, the path under which we scrape ourselves
|
|
// should already contain the prefix.
|
|
u.Scheme = opts.ExternalURL.Scheme
|
|
u.Host = opts.ExternalURL.Host
|
|
} else {
|
|
// Otherwise, we only know that localhost is not reachable
|
|
// externally, so we replace only the hostname by the one in the
|
|
// external URL. It could be the wrong hostname for the service on
|
|
// this port, but it's still the best possible guess.
|
|
host, _, err := net.SplitHostPort(opts.ExternalURL.Host)
|
|
if err != nil {
|
|
return u
|
|
}
|
|
u.Host = host + ":" + port
|
|
}
|
|
break
|
|
}
|
|
}
|
|
return u
|
|
},
|
|
"healthToClass": func(th retrieval.TargetHealth) string {
|
|
switch th {
|
|
case retrieval.HealthUnknown:
|
|
return "warning"
|
|
case retrieval.HealthGood:
|
|
return "success"
|
|
default:
|
|
return "danger"
|
|
}
|
|
},
|
|
"alertStateToClass": func(as rules.AlertState) string {
|
|
switch as {
|
|
case rules.StateInactive:
|
|
return "success"
|
|
case rules.StatePending:
|
|
return "warning"
|
|
case rules.StateFiring:
|
|
return "danger"
|
|
default:
|
|
panic("unknown alert state")
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
func (h *Handler) getTemplate(name string) (string, error) {
|
|
baseTmpl, err := ui.Asset("web/ui/templates/_base.html")
|
|
if err != nil {
|
|
return "", fmt.Errorf("error reading base template: %s", err)
|
|
}
|
|
pageTmpl, err := ui.Asset(filepath.Join("web/ui/templates", name))
|
|
if err != nil {
|
|
return "", fmt.Errorf("error reading page template %s: %s", name, err)
|
|
}
|
|
return string(baseTmpl) + string(pageTmpl), nil
|
|
}
|
|
|
|
func (h *Handler) executeTemplate(w http.ResponseWriter, name string, data interface{}) {
|
|
text, err := h.getTemplate(name)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
}
|
|
|
|
tmpl := template.NewTemplateExpander(h.context, text, name, data, h.now(), h.queryEngine, h.options.ExternalURL.Path)
|
|
tmpl.Funcs(tmplFuncs(h.consolesPath(), h.options))
|
|
|
|
result, err := tmpl.ExpandHTML(nil)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
io.WriteString(w, result)
|
|
}
|
|
|
|
func dumpHeap(w http.ResponseWriter, r *http.Request) {
|
|
target := fmt.Sprintf("/tmp/%d.heap", time.Now().Unix())
|
|
f, err := os.Create(target)
|
|
if err != nil {
|
|
log.Error("Could not dump heap: ", err)
|
|
}
|
|
fmt.Fprintf(w, "Writing to %s...", target)
|
|
defer f.Close()
|
|
pprof_runtime.WriteHeapProfile(f)
|
|
fmt.Fprintf(w, "Done")
|
|
}
|
|
|
|
// AlertStatus bundles alerting rules and the mapping of alert states to row classes.
|
|
type AlertStatus struct {
|
|
AlertingRules []*rules.AlertingRule
|
|
AlertStateToRowClass map[rules.AlertState]string
|
|
}
|
|
|
|
type byAlertStateAndNameSorter struct {
|
|
alerts []*rules.AlertingRule
|
|
}
|
|
|
|
func (s byAlertStateAndNameSorter) Len() int {
|
|
return len(s.alerts)
|
|
}
|
|
|
|
func (s byAlertStateAndNameSorter) Less(i, j int) bool {
|
|
return s.alerts[i].State() > s.alerts[j].State() ||
|
|
(s.alerts[i].State() == s.alerts[j].State() &&
|
|
s.alerts[i].Name() < s.alerts[j].Name())
|
|
}
|
|
|
|
func (s byAlertStateAndNameSorter) Swap(i, j int) {
|
|
s.alerts[i], s.alerts[j] = s.alerts[j], s.alerts[i]
|
|
}
|