ceph_exporter/ceph/rgw.go
2022-02-23 15:43:46 -08:00

220 lines
5.6 KiB
Go

package ceph
import (
"encoding/json"
"os/exec"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)
const rgwGCTimeFormat = "2006-01-02 15:04:05"
const radosgwAdminPath = "/usr/bin/radosgw-admin"
const backgroundCollectInterval = time.Duration(5 * time.Minute)
const (
RGWModeDisabled = 0
RGWModeForeground = 1
RGWModeBackground = 2
)
type rgwTaskGC struct {
Tag string `json:"tag"`
Time string `json:"time"`
Objects []struct {
Pool string `json:"pool"`
OID string `json:"oid"`
Key string `json:"ky"`
Instance string `json:"instance"`
} `json:"objs"`
}
// Expires returns the timestamp that this task will expire and become active
func (gc rgwTaskGC) ExpiresAt() time.Time {
tmp := strings.SplitN(gc.Time, ".", 2)
last, err := time.Parse(rgwGCTimeFormat, tmp[0])
if err != nil {
return time.Now()
}
return last
}
// rgwGetGCTaskList get the RGW Garbage Collection task list
func rgwGetGCTaskList(config string) ([]byte, error) {
var (
out []byte
err error
)
if out, err = exec.Command(radosgwAdminPath, "-c", config, "gc", "list", "--include-all").Output(); err != nil {
return nil, err
}
return out, nil
}
// RGWCollector collects metrics from the RGW service
type RGWCollector struct {
config string
background bool
logger *logrus.Logger
version *Version
// ActiveTasks reports the number of (expired) RGW GC tasks
ActiveTasks *prometheus.GaugeVec
// ActiveObjects reports the total number of RGW GC objects contained in active tasks
ActiveObjects *prometheus.GaugeVec
// PendingTasks reports the number of RGW GC tasks queued but not yet expired
PendingTasks *prometheus.GaugeVec
// PendingObjects reports the total number of RGW GC objects contained in pending tasks
PendingObjects *prometheus.GaugeVec
getRGWGCTaskList func(string) ([]byte, error)
}
// NewRGWCollector creates an instance of the RGWCollector and instantiates
// the individual metrics that we can collect from the RGW service
func NewRGWCollector(exporter *Exporter, background bool) *RGWCollector {
labels := make(prometheus.Labels)
labels["cluster"] = exporter.Cluster
rgw := &RGWCollector{
config: exporter.Config,
background: background,
logger: exporter.Logger,
version: exporter.Version,
getRGWGCTaskList: rgwGetGCTaskList,
ActiveTasks: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "rgw_gc_active_tasks",
Help: "RGW GC active task count",
ConstLabels: labels,
},
[]string{},
),
ActiveObjects: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "rgw_gc_active_objects",
Help: "RGW GC active object count",
ConstLabels: labels,
},
[]string{},
),
PendingTasks: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "rgw_gc_pending_tasks",
Help: "RGW GC pending task count",
ConstLabels: labels,
},
[]string{},
),
PendingObjects: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: cephNamespace,
Name: "rgw_gc_pending_objects",
Help: "RGW GC pending object count",
ConstLabels: labels,
},
[]string{},
),
}
if rgw.background {
// rgw stats need to be collected in the background as this can take a while
// if we have a large backlog
go rgw.backgroundCollect()
}
return rgw
}
func (r *RGWCollector) collectorList() []prometheus.Collector {
return []prometheus.Collector{
r.ActiveTasks,
r.ActiveObjects,
r.PendingTasks,
r.PendingObjects,
}
}
func (r *RGWCollector) backgroundCollect() error {
for {
r.logger.WithField("background", r.background).Debug("collecting RGW GC stats")
err := r.collect()
if err != nil {
r.logger.WithField("background", r.background).WithError(err).Error("error collecting RGW GC stats")
}
time.Sleep(backgroundCollectInterval)
}
}
func (r *RGWCollector) collect() error {
data, err := r.getRGWGCTaskList(r.config)
if err != nil {
return err
}
tasks := make([]rgwTaskGC, 0, 0)
err = json.Unmarshal(data, &tasks)
if err != nil {
return err
}
activeTaskCount := int(0)
activeObjectCount := int(0)
pendingTaskCount := int(0)
pendingObjectCount := int(0)
now := time.Now()
for _, task := range tasks {
if now.Sub(task.ExpiresAt()) > 0 {
// timer expired these are active
activeTaskCount += 1
activeObjectCount += len(task.Objects)
} else {
pendingTaskCount += 1
pendingObjectCount += len(task.Objects)
}
}
r.ActiveTasks.WithLabelValues().Set(float64(activeTaskCount))
r.PendingTasks.WithLabelValues().Set(float64(pendingTaskCount))
r.ActiveObjects.WithLabelValues().Set(float64(activeObjectCount))
r.PendingObjects.WithLabelValues().Set(float64(pendingObjectCount))
return nil
}
// Describe sends the descriptors of each RGWCollector related metrics we have defined
// to the provided prometheus channel.
func (r *RGWCollector) Describe(ch chan<- *prometheus.Desc) {
for _, metric := range r.collectorList() {
metric.Describe(ch)
}
}
// Collect sends all the collected metrics to the provided prometheus channel.
// It requires the caller to handle synchronization.
func (r *RGWCollector) Collect(ch chan<- prometheus.Metric) {
if !r.background {
r.logger.WithField("background", r.background).Debug("collecting RGW GC stats")
err := r.collect()
if err != nil {
r.logger.WithField("background", r.background).WithError(err).Error("error collecting RGW GC stats")
}
}
for _, metric := range r.collectorList() {
metric.Collect(ch)
}
}