mirror of
https://github.com/digitalocean/ceph_exporter
synced 2025-02-27 08:40:27 +00:00
Ssobolewski/run rgw stats in background (#97)
* RGW GC stat collection can take a long time if there is a very large backlog * Use a const for background interval * Minor change per code review
This commit is contained in:
parent
dc6ab9c636
commit
415d296c31
@ -12,6 +12,13 @@ import (
|
||||
|
||||
const rgwGCTimeFormat = "2006-01-02 15:04:05"
|
||||
const radosgwAdminPath = "/usr/bin/radosgw-admin"
|
||||
const backgroundCollectInterval = time.Duration(5 * time.Minute)
|
||||
|
||||
const (
|
||||
RGWModeDisabled = 0
|
||||
RGWModeForeground = 1
|
||||
RGWModeBackground = 2
|
||||
)
|
||||
|
||||
type rgwTaskGC struct {
|
||||
Tag string `json:"tag"`
|
||||
@ -51,7 +58,8 @@ func rgwGetGCTaskList(config string) ([]byte, error) {
|
||||
|
||||
// RGWCollector collects metrics from the RGW service
|
||||
type RGWCollector struct {
|
||||
config string
|
||||
config string
|
||||
background bool
|
||||
|
||||
// ActiveTasks reports the number of (expired) RGW GC tasks
|
||||
ActiveTasks *prometheus.GaugeVec
|
||||
@ -68,11 +76,12 @@ type RGWCollector struct {
|
||||
|
||||
// NewRGWCollector creates an instance of the RGWCollector and instantiates
|
||||
// the individual metrics that we can collect from the RGW service
|
||||
func NewRGWCollector(cluster string, config string) *RGWCollector {
|
||||
func NewRGWCollector(cluster string, config string, background bool) *RGWCollector {
|
||||
labels := make(prometheus.Labels)
|
||||
labels["cluster"] = cluster
|
||||
return &RGWCollector{
|
||||
rgw := &RGWCollector{
|
||||
config: config,
|
||||
background: background,
|
||||
getRGWGCTaskList: rgwGetGCTaskList,
|
||||
|
||||
ActiveTasks: prometheus.NewGaugeVec(
|
||||
@ -112,6 +121,14 @@ func NewRGWCollector(cluster string, config string) *RGWCollector {
|
||||
[]string{},
|
||||
),
|
||||
}
|
||||
|
||||
if rgw.background {
|
||||
// rgw stats need to be collected in the background as this can take a while
|
||||
// if we have a large backlog
|
||||
go rgw.backgroundCollect()
|
||||
}
|
||||
|
||||
return rgw
|
||||
}
|
||||
|
||||
func (r *RGWCollector) collectorList() []prometheus.Collector {
|
||||
@ -123,6 +140,16 @@ func (r *RGWCollector) collectorList() []prometheus.Collector {
|
||||
}
|
||||
}
|
||||
|
||||
func (r *RGWCollector) backgroundCollect() error {
|
||||
for {
|
||||
err := r.collect()
|
||||
if err != nil {
|
||||
log.Println("Failed to collect RGW GC stats", err)
|
||||
}
|
||||
time.Sleep(backgroundCollectInterval)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *RGWCollector) collect() error {
|
||||
data, err := r.getRGWGCTaskList(r.config)
|
||||
if err != nil {
|
||||
@ -172,9 +199,11 @@ func (r *RGWCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
// Collect sends all the collected metrics to the provided prometheus channel.
|
||||
// It requires the caller to handle synchronization.
|
||||
func (r *RGWCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
err := r.collect()
|
||||
if err != nil {
|
||||
log.Println("Failed to collect RGW GC stats", err)
|
||||
if !r.background {
|
||||
err := r.collect()
|
||||
if err != nil {
|
||||
log.Println("Failed to collect RGW GC stats", err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, metric := range r.collectorList() {
|
||||
|
@ -114,7 +114,7 @@ func TestRGWCollector(t *testing.T) {
|
||||
},
|
||||
} {
|
||||
func() {
|
||||
collector := NewRGWCollector("ceph", "")
|
||||
collector := NewRGWCollector("ceph", "", false) // run in foreground for testing
|
||||
collector.getRGWGCTaskList = func(cluster string) ([]byte, error) {
|
||||
if tt.input != nil {
|
||||
return tt.input, nil
|
||||
|
24
exporter.go
24
exporter.go
@ -79,7 +79,7 @@ var _ prometheus.Collector = &CephExporter{}
|
||||
// NewCephExporter creates an instance to CephExporter and returns a reference
|
||||
// to it. We can choose to enable a collector to extract stats out of by adding
|
||||
// it to the list of collectors.
|
||||
func NewCephExporter(conn *rados.Conn, cluster string, config string, withRGW bool) *CephExporter {
|
||||
func NewCephExporter(conn *rados.Conn, cluster string, config string, rgwMode int) *CephExporter {
|
||||
c := &CephExporter{
|
||||
collectors: []prometheus.Collector{
|
||||
collectors.NewClusterUsageCollector(conn, cluster),
|
||||
@ -90,10 +90,22 @@ func NewCephExporter(conn *rados.Conn, cluster string, config string, withRGW bo
|
||||
},
|
||||
}
|
||||
|
||||
if withRGW {
|
||||
switch rgwMode {
|
||||
case collectors.RGWModeForeground:
|
||||
c.collectors = append(c.collectors,
|
||||
collectors.NewRGWCollector(cluster, config),
|
||||
collectors.NewRGWCollector(cluster, config, false),
|
||||
)
|
||||
|
||||
case collectors.RGWModeBackground:
|
||||
c.collectors = append(c.collectors,
|
||||
collectors.NewRGWCollector(cluster, config, true),
|
||||
)
|
||||
|
||||
case collectors.RGWModeDisabled:
|
||||
// nothing to do
|
||||
|
||||
default:
|
||||
log.Printf("RGW Collector Disabled do to invalid mode (%d)\n", rgwMode)
|
||||
}
|
||||
|
||||
return c
|
||||
@ -126,7 +138,7 @@ func main() {
|
||||
cephConfig = flag.String("ceph.config", "", "path to ceph config file")
|
||||
cephUser = flag.String("ceph.user", "admin", "Ceph user to connect to cluster.")
|
||||
|
||||
withRGW = flag.Bool("with-rgw", false, "Enable collection of stats from RGW")
|
||||
rgwMode = flag.Int("rgw.mode", 0, "Enable collection of stats from RGW (0:disabled 1:enabled 2:background)")
|
||||
|
||||
exporterConfig = flag.String("exporter.config", "/etc/ceph/exporter.yml", "Path to ceph exporter config.")
|
||||
)
|
||||
@ -158,7 +170,7 @@ func main() {
|
||||
defer conn.Shutdown()
|
||||
|
||||
log.Printf("Starting ceph exporter for cluster: %s", cluster.ClusterLabel)
|
||||
err = prometheus.Register(NewCephExporter(conn, cluster.ClusterLabel, cluster.ConfigFile, *withRGW))
|
||||
err = prometheus.Register(NewCephExporter(conn, cluster.ClusterLabel, cluster.ConfigFile, *rgwMode))
|
||||
if err != nil {
|
||||
log.Fatalf("cannot export cluster: %s error: %v", cluster.ClusterLabel, err)
|
||||
}
|
||||
@ -183,7 +195,7 @@ func main() {
|
||||
}
|
||||
defer conn.Shutdown()
|
||||
|
||||
prometheus.MustRegister(NewCephExporter(conn, defaultCephClusterLabel, defaultCephConfigPath, *withRGW))
|
||||
prometheus.MustRegister(NewCephExporter(conn, defaultCephClusterLabel, defaultCephConfigPath, *rgwMode))
|
||||
}
|
||||
|
||||
http.Handle(*metricsPath, promhttp.Handler())
|
||||
|
Loading…
Reference in New Issue
Block a user