2016-01-06 18:24:20 +00:00
|
|
|
// Copyright 2016 DigitalOcean
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2016-01-10 20:35:53 +00:00
|
|
|
// Command ceph_exporter provides a Prometheus exporter for a Ceph cluster.
|
2016-01-06 18:24:20 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"flag"
|
|
|
|
"log"
|
2017-11-23 09:24:31 +00:00
|
|
|
"net"
|
2016-01-06 18:24:20 +00:00
|
|
|
"net/http"
|
2017-11-23 09:24:31 +00:00
|
|
|
"os"
|
2016-01-06 18:24:20 +00:00
|
|
|
"sync"
|
2017-11-23 09:24:31 +00:00
|
|
|
"syscall"
|
|
|
|
"time"
|
2016-01-06 18:24:20 +00:00
|
|
|
|
|
|
|
"github.com/ceph/go-ceph/rados"
|
2017-09-14 11:39:52 +00:00
|
|
|
"github.com/digitalocean/ceph_exporter/collectors"
|
2016-01-06 18:24:20 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2017-09-14 11:39:52 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
2016-01-06 18:24:20 +00:00
|
|
|
)
|
|
|
|
|
2018-08-01 13:37:07 +00:00
|
|
|
const (
|
|
|
|
defaultCephClusterLabel = "ceph"
|
|
|
|
defaultCephConfigPath = "/etc/ceph/ceph.conf"
|
|
|
|
)
|
|
|
|
|
2017-11-23 09:24:31 +00:00
|
|
|
// This horrible thing is a copy of tcpKeepAliveListener, tweaked to
|
|
|
|
// specifically check if it hits EMFILE when doing an accept, and if so,
|
|
|
|
// terminate the process.
|
|
|
|
|
2018-07-09 09:08:14 +00:00
|
|
|
const keepAlive time.Duration = 3 * time.Minute
|
|
|
|
|
2017-11-23 09:24:31 +00:00
|
|
|
type emfileAwareTcpListener struct {
|
|
|
|
*net.TCPListener
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ln emfileAwareTcpListener) Accept() (c net.Conn, err error) {
|
|
|
|
tc, err := ln.AcceptTCP()
|
|
|
|
if err != nil {
|
|
|
|
if oerr, ok := err.(*net.OpError); ok {
|
|
|
|
if serr, ok := oerr.Err.(*os.SyscallError); ok && serr.Err == syscall.EMFILE {
|
|
|
|
// This calls os.Exit(1) and terminates the process
|
|
|
|
log.Fatalf("%v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Default return
|
|
|
|
return
|
|
|
|
}
|
|
|
|
tc.SetKeepAlive(true)
|
2018-07-09 09:08:14 +00:00
|
|
|
tc.SetKeepAlivePeriod(keepAlive)
|
2017-11-23 09:24:31 +00:00
|
|
|
return tc, nil
|
|
|
|
}
|
|
|
|
|
2016-01-06 18:24:20 +00:00
|
|
|
// CephExporter wraps all the ceph collectors and provides a single global
|
|
|
|
// exporter to extracts metrics out of. It also ensures that the collection
|
|
|
|
// is done in a thread-safe manner, the necessary requirement stated by
|
|
|
|
// prometheus. It also implements a prometheus.Collector interface in order
|
|
|
|
// to register it correctly.
|
|
|
|
type CephExporter struct {
|
|
|
|
mu sync.Mutex
|
|
|
|
collectors []prometheus.Collector
|
|
|
|
}
|
|
|
|
|
|
|
|
// Verify that the exporter implements the interface correctly.
|
|
|
|
var _ prometheus.Collector = &CephExporter{}
|
|
|
|
|
|
|
|
// NewCephExporter creates an instance to CephExporter and returns a reference
|
|
|
|
// to it. We can choose to enable a collector to extract stats out of by adding
|
|
|
|
// it to the list of collectors.
|
2018-08-01 13:37:07 +00:00
|
|
|
func NewCephExporter(conn *rados.Conn, cluster string, config string, withRGW bool) *CephExporter {
|
|
|
|
c := &CephExporter{
|
2016-01-06 18:24:20 +00:00
|
|
|
collectors: []prometheus.Collector{
|
2017-03-23 20:20:25 +00:00
|
|
|
collectors.NewClusterUsageCollector(conn, cluster),
|
|
|
|
collectors.NewPoolUsageCollector(conn, cluster),
|
|
|
|
collectors.NewClusterHealthCollector(conn, cluster),
|
|
|
|
collectors.NewMonitorCollector(conn, cluster),
|
|
|
|
collectors.NewOSDCollector(conn, cluster),
|
2016-01-06 18:24:20 +00:00
|
|
|
},
|
|
|
|
}
|
2018-08-01 13:37:07 +00:00
|
|
|
|
|
|
|
if withRGW {
|
|
|
|
c.collectors = append(c.collectors,
|
|
|
|
collectors.NewRGWCollector(cluster, config),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
return c
|
2016-01-06 18:24:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Describe sends all the descriptors of the collectors included to
|
|
|
|
// the provided channel.
|
|
|
|
func (c *CephExporter) Describe(ch chan<- *prometheus.Desc) {
|
|
|
|
for _, cc := range c.collectors {
|
|
|
|
cc.Describe(ch)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collect sends the collected metrics from each of the collectors to
|
|
|
|
// prometheus. Collect could be called several times concurrently
|
|
|
|
// and thus its run is protected by a single mutex.
|
|
|
|
func (c *CephExporter) Collect(ch chan<- prometheus.Metric) {
|
|
|
|
c.mu.Lock()
|
|
|
|
defer c.mu.Unlock()
|
|
|
|
|
|
|
|
for _, cc := range c.collectors {
|
|
|
|
cc.Collect(ch)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
var (
|
2016-01-29 14:42:20 +00:00
|
|
|
addr = flag.String("telemetry.addr", ":9128", "host:port for ceph exporter")
|
2016-01-06 18:24:20 +00:00
|
|
|
metricsPath = flag.String("telemetry.path", "/metrics", "URL path for surfacing collected metrics")
|
2017-03-23 20:20:25 +00:00
|
|
|
cephConfig = flag.String("ceph.config", "", "path to ceph config file")
|
|
|
|
cephUser = flag.String("ceph.user", "admin", "Ceph user to connect to cluster.")
|
2016-01-06 18:24:20 +00:00
|
|
|
|
2018-08-01 13:37:07 +00:00
|
|
|
withRGW = flag.Bool("with-rgw", false, "Enable collection of stats from RGW")
|
|
|
|
|
2017-03-23 20:20:25 +00:00
|
|
|
exporterConfig = flag.String("exporter.config", "/etc/ceph/exporter.yml", "Path to ceph exporter config.")
|
2016-01-06 18:24:20 +00:00
|
|
|
)
|
|
|
|
flag.Parse()
|
|
|
|
|
2017-03-23 20:20:25 +00:00
|
|
|
if fileExists(*exporterConfig) {
|
|
|
|
|
|
|
|
cfg, err := ParseConfig(*exporterConfig)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("Error: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, cluster := range cfg.Cluster {
|
|
|
|
|
|
|
|
conn, err := rados.NewConnWithUser(cluster.User)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("cannot create new ceph connection: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
err = conn.ReadConfigFile(cluster.ConfigFile)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("cannot read ceph config file: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := conn.Connect(); err != nil {
|
|
|
|
log.Fatalf("cannot connect to ceph cluster: %s", err)
|
|
|
|
}
|
|
|
|
// defer Shutdown to program exit
|
|
|
|
defer conn.Shutdown()
|
|
|
|
|
|
|
|
log.Printf("Starting ceph exporter for cluster: %s", cluster.ClusterLabel)
|
2018-08-01 13:37:07 +00:00
|
|
|
err = prometheus.Register(NewCephExporter(conn, cluster.ClusterLabel, cluster.ConfigFile, *withRGW))
|
2017-03-23 20:20:25 +00:00
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("cannot export cluster: %s error: %v", cluster.ClusterLabel, err)
|
|
|
|
}
|
|
|
|
}
|
2016-01-06 18:24:20 +00:00
|
|
|
} else {
|
2017-03-23 20:20:25 +00:00
|
|
|
conn, err := rados.NewConnWithUser(*cephUser)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("cannot create new ceph connection: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if *cephConfig != "" {
|
|
|
|
err = conn.ReadConfigFile(*cephConfig)
|
|
|
|
} else {
|
|
|
|
err = conn.ReadDefaultConfigFile()
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("cannot read ceph config file: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := conn.Connect(); err != nil {
|
|
|
|
log.Fatalf("cannot connect to ceph cluster: %s", err)
|
|
|
|
}
|
|
|
|
defer conn.Shutdown()
|
|
|
|
|
2018-08-01 13:37:07 +00:00
|
|
|
prometheus.MustRegister(NewCephExporter(conn, defaultCephClusterLabel, defaultCephConfigPath, *withRGW))
|
2016-01-06 18:24:20 +00:00
|
|
|
}
|
|
|
|
|
2017-09-14 11:39:52 +00:00
|
|
|
http.Handle(*metricsPath, promhttp.Handler())
|
2016-01-06 18:24:20 +00:00
|
|
|
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
2016-12-29 22:08:20 +00:00
|
|
|
w.Write([]byte(`<html>
|
|
|
|
<head><title>Ceph Exporter</title></head>
|
|
|
|
<body>
|
|
|
|
<h1>Ceph Exporter</h1>
|
|
|
|
<p><a href='` + *metricsPath + `'>Metrics</a></p>
|
|
|
|
</body>
|
|
|
|
</html>`))
|
2016-01-06 18:24:20 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
log.Printf("Starting ceph exporter on %q", *addr)
|
2017-11-23 09:24:31 +00:00
|
|
|
// Below is essentially http.ListenAndServe(), but using our custom
|
|
|
|
// emfileAwareTcpListener that will die if we run out of file descriptors
|
|
|
|
ln, err := net.Listen("tcp", *addr)
|
|
|
|
if err == nil {
|
|
|
|
err := http.Serve(emfileAwareTcpListener{ln.(*net.TCPListener)}, nil)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("unable to serve requests: %s", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("unable to create listener: %s", err)
|
2016-01-06 18:24:20 +00:00
|
|
|
}
|
|
|
|
}
|