pass version to collectors when calling Collect()

This commit is contained in:
Alex Marangone 2023-02-14 11:10:54 -08:00
parent 69edc55596
commit ba15bf50a3
10 changed files with 90 additions and 70 deletions

View File

@ -32,7 +32,6 @@ const (
type ClusterUsageCollector struct {
conn Conn
logger *logrus.Logger
version *Version
// GlobalCapacity displays the total storage capacity of the cluster. This
// information is based on the actual no. of objects that are
@ -57,7 +56,6 @@ func NewClusterUsageCollector(exporter *Exporter) *ClusterUsageCollector {
return &ClusterUsageCollector{
conn: exporter.Conn,
logger: exporter.Logger,
version: exporter.Version,
GlobalCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: cephNamespace,
@ -106,7 +104,6 @@ func (c *ClusterUsageCollector) collect() error {
return err
}
stats := &cephClusterStats{}
if err := json.Unmarshal(buf, stats); err != nil {
return err
@ -143,7 +140,7 @@ func (c *ClusterUsageCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect sends the metric values for each metric pertaining to the global
// cluster usage over to the provided prometheus Metric channel.
func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric) {
func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
c.logger.Debug("collecting cluster usage metrics")
if err := c.collect(); err != nil {
c.logger.WithError(err).Error("error collecting cluster usage metrics")

View File

@ -33,7 +33,6 @@ var (
type CrashesCollector struct {
conn Conn
logger *logrus.Logger
version *Version
crashReportsDesc *prometheus.Desc
}
@ -46,7 +45,6 @@ func NewCrashesCollector(exporter *Exporter) *CrashesCollector {
collector := &CrashesCollector{
conn: exporter.Conn,
logger: exporter.Logger,
version: exporter.Version,
crashReportsDesc: prometheus.NewDesc(
fmt.Sprintf("%s_crash_reports", cephNamespace),
@ -106,7 +104,7 @@ func (c *CrashesCollector) Describe(ch chan<- *prometheus.Desc) {
}
// Collect sends all the collected metrics Prometheus.
func (c *CrashesCollector) Collect(ch chan<- prometheus.Metric) {
func (c *CrashesCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
crashes, err := c.getCrashLs()
if err != nil {
c.logger.WithError(err).Error("failed to run 'ceph crash ls'")

View File

@ -226,7 +226,26 @@ func (exporter *Exporter) Describe(ch chan<- *prometheus.Desc) {
}
for _, cc := range exporter.cc {
cc.Describe(ch)
switch cc.(type) {
case *ClusterUsageCollector:
cc.(*ClusterUsageCollector).Describe(ch)
case *PoolUsageCollector:
cc.(*PoolUsageCollector).Describe(ch)
case *PoolInfoCollector:
cc.(*PoolInfoCollector).Describe(ch)
case *ClusterHealthCollector:
cc.(*ClusterHealthCollector).Describe(ch)
case *MonitorCollector:
cc.(*MonitorCollector).Describe(ch)
case *OSDCollector:
cc.(*OSDCollector).Describe(ch)
case *CrashesCollector:
cc.(*CrashesCollector).Describe(ch)
case *RbdMirrorStatusCollector:
cc.(*RbdMirrorStatusCollector).Describe(ch)
case *RGWCollector:
cc.(*RGWCollector).Describe(ch)
}
}
}
@ -250,6 +269,25 @@ func (exporter *Exporter) Collect(ch chan<- prometheus.Metric) {
}
for _, cc := range exporter.cc {
cc.Collect(ch)
switch cc.(type) {
case *ClusterUsageCollector:
cc.(*ClusterUsageCollector).Collect(ch, exporter.Version)
case *PoolUsageCollector:
cc.(*PoolUsageCollector).Collect(ch, exporter.Version)
case *PoolInfoCollector:
cc.(*PoolInfoCollector).Collect(ch, exporter.Version)
case *ClusterHealthCollector:
cc.(*ClusterHealthCollector).Collect(ch, exporter.Version)
case *MonitorCollector:
cc.(*MonitorCollector).Collect(ch, exporter.Version)
case *OSDCollector:
cc.(*OSDCollector).Collect(ch, exporter.Version)
case *CrashesCollector:
cc.(*CrashesCollector).Collect(ch, exporter.Version)
case *RbdMirrorStatusCollector:
cc.(*RbdMirrorStatusCollector).Collect(ch, exporter.Version)
case *RGWCollector:
cc.(*RGWCollector).Collect(ch, exporter.Version)
}
}
}

View File

@ -49,7 +49,6 @@ var (
type ClusterHealthCollector struct {
conn Conn
logger *logrus.Logger
version *Version
// healthChecksMap stores warnings and their criticality
healthChecksMap map[string]int
@ -289,7 +288,6 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
collector := &ClusterHealthCollector{
conn: exporter.Conn,
logger: exporter.Logger,
version: exporter.Version,
healthChecksMap: map[string]int{
"AUTH_BAD_CAPS": 2,
@ -558,13 +556,6 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
"notieragent": &collector.OSDMapFlagNoTierAgent,
}
if exporter.Version.IsAtLeast(Pacific) {
// pacific adds the DAEMON_OLD_VERSION health check
// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
// we'll interpret this is a critical warning (2)
collector.healthChecksMap["DAEMON_OLD_VERSION"] = 2
}
return collector
}
@ -724,7 +715,7 @@ type cephHealthStats struct {
} `json:"servicemap"`
}
func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric, version *Version) error {
cmd := c.cephUsageCommand(jsonFormat)
buf, _, err := c.conn.MonCommand(cmd)
if err != nil {
@ -883,6 +874,14 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
}
}
}
if version.IsAtLeast(Pacific) {
// pacific adds the DAEMON_OLD_VERSION health check
// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
// we'll interpret this is a critical warning (2)
c.healthChecksMap["DAEMON_OLD_VERSION"] = 2
}
if !mapEmpty {
if val, present := c.healthChecksMap[k]; present {
c.HealthStatusInterpreter.Set(float64(val))
@ -991,7 +990,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
ch <- prometheus.MustNewConstMetric(c.CachePromoteIOOps, prometheus.GaugeValue, stats.PGMap.CachePromoteOpPerSec)
var actualOsdMap osdMap
if c.version.IsAtLeast(Octopus) {
if version.IsAtLeast(Octopus) {
if stats.OSDMap != nil {
actualOsdMap = osdMap{
NumOSDs: stats.OSDMap["num_osds"].(float64),
@ -1031,7 +1030,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
activeMgr := 0
standByMgrs := 0
if c.version.IsAtLeast(Octopus) {
if version.IsAtLeast(Octopus) {
if stats.MgrMap.Available {
activeMgr = 1
}
@ -1334,9 +1333,9 @@ func (c *ClusterHealthCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect sends all the collected metrics to the provided prometheus channel.
// It requires the caller to handle synchronization.
func (c *ClusterHealthCollector) Collect(ch chan<- prometheus.Metric) {
func (c *ClusterHealthCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
c.logger.Debug("collecting cluster health metrics")
if err := c.collect(ch); err != nil {
if err := c.collect(ch, version); err != nil {
c.logger.WithError(err).Error("error collecting cluster health metrics " + err.Error())
}

View File

@ -34,7 +34,6 @@ var versionRegexp = regexp.MustCompile(`ceph version (?P<version_tag>\d+\.\d+\.\
type MonitorCollector struct {
conn Conn
logger *logrus.Logger
version *Version
// TotalKBs display the total storage a given monitor node has.
TotalKBs *prometheus.GaugeVec
@ -98,7 +97,6 @@ func NewMonitorCollector(exporter *Exporter) *MonitorCollector {
return &MonitorCollector{
conn: exporter.Conn,
logger: exporter.Logger,
version: exporter.Version,
TotalKBs: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
@ -553,7 +551,7 @@ func (m *MonitorCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect extracts the given metrics from the Monitors and sends it to the prometheus
// channel.
func (m *MonitorCollector) Collect(ch chan<- prometheus.Metric) {
func (m *MonitorCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
m.logger.Debug("collecting ceph monitor metrics")
if err := m.collect(); err != nil {
m.logger.WithError(err).Error("error collecting ceph monitor metrics")

View File

@ -42,7 +42,6 @@ const (
type OSDCollector struct {
conn Conn
logger *logrus.Logger
version *Version
// osdScrubCache holds the cache of previous PG scrubs
osdScrubCache map[int]int
@ -152,9 +151,6 @@ type OSDCollector struct {
OldestInactivePG prometheus.Gauge
}
// This ensures OSDCollector implements interface prometheus.Collector.
var _ prometheus.Collector = &OSDCollector{}
// NewOSDCollector creates an instance of the OSDCollector and instantiates the
// individual metrics that show information about the OSD.
func NewOSDCollector(exporter *Exporter) *OSDCollector {
@ -165,7 +161,6 @@ func NewOSDCollector(exporter *Exporter) *OSDCollector {
return &OSDCollector{
conn: exporter.Conn,
logger: exporter.Logger,
version: exporter.Version,
osdScrubCache: make(map[int]int),
osdLabelsCache: make(map[int64]*cephOSDLabel),
@ -1119,7 +1114,7 @@ func (o *OSDCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect sends all the collected metrics to the provided Prometheus channel.
// It requires the caller to handle synchronization.
func (o *OSDCollector) Collect(ch chan<- prometheus.Metric) {
func (o *OSDCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
// Reset daemon specifc metrics; daemons can leave the cluster
o.CrushWeight.Reset()
o.Depth.Reset()

View File

@ -34,7 +34,6 @@ const (
type PoolInfoCollector struct {
conn Conn
logger *logrus.Logger
version *Version
// PGNum contains the count of PGs allotted to a particular pool.
PGNum *prometheus.GaugeVec
@ -77,7 +76,6 @@ func NewPoolInfoCollector(exporter *Exporter) *PoolInfoCollector {
return &PoolInfoCollector{
conn: exporter.Conn,
logger: exporter.Logger,
version: exporter.Version,
PGNum: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
@ -261,7 +259,7 @@ func (p *PoolInfoCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect extracts the current values of all the metrics and sends them to the
// prometheus channel.
func (p *PoolInfoCollector) Collect(ch chan<- prometheus.Metric) {
func (p *PoolInfoCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
p.logger.Debug("collecting pool metrics")
if err := p.collect(); err != nil {
p.logger.WithError(err).Error("error collecting pool metrics")

View File

@ -27,7 +27,6 @@ import (
type PoolUsageCollector struct {
conn Conn
logger *logrus.Logger
version *Version
// UsedBytes tracks the amount of bytes currently allocated for the pool. This
// does not factor in the overcommitment made for individual images.
@ -82,7 +81,6 @@ func NewPoolUsageCollector(exporter *Exporter) *PoolUsageCollector {
return &PoolUsageCollector{
conn: exporter.Conn,
logger: exporter.Logger,
version: exporter.Version,
UsedBytes: prometheus.NewDesc(fmt.Sprintf("%s_%s_used_bytes", cephNamespace, subSystem), "Capacity of the pool that is currently under use",
poolLabel, labels,
@ -213,7 +211,7 @@ func (p *PoolUsageCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect extracts the current values of all the metrics and sends them to the
// prometheus channel.
func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric) {
func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
p.logger.Debug("collecting pool usage metrics")
if err := p.collect(ch); err != nil {
p.logger.WithError(err).Error("error collecting pool usage metrics")

View File

@ -155,7 +155,7 @@ func (c *RbdMirrorStatusCollector) Describe(ch chan<- *prometheus.Desc) {
}
// Collect sends all the collected metrics Prometheus.
func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric) {
func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
status, err := rbdMirrorStatus(c.config, c.user)
if err != nil {
c.logger.WithError(err).Error("failed to run 'rbd mirror pool status'")
@ -166,6 +166,7 @@ func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric) {
}
c.RbdMirrorStatus.Set(c.mirrorStatusStringToInt(rbdStatus.Summary.Health))
c.version = version
if c.version.IsAtLeast(Pacific) {
c.RbdMirrorDaemonStatus.Set(c.mirrorStatusStringToInt(rbdStatus.Summary.DaemonHealth))

View File

@ -76,7 +76,6 @@ type RGWCollector struct {
user string
background bool
logger *logrus.Logger
version *Version
// ActiveTasks reports the number of (expired) RGW GC tasks
ActiveTasks *prometheus.GaugeVec
@ -101,7 +100,6 @@ func NewRGWCollector(exporter *Exporter, background bool) *RGWCollector {
config: exporter.Config,
background: background,
logger: exporter.Logger,
version: exporter.Version,
getRGWGCTaskList: rgwGetGCTaskList,
ActiveTasks: prometheus.NewGaugeVec(
@ -219,7 +217,7 @@ func (r *RGWCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect sends all the collected metrics to the provided prometheus channel.
// It requires the caller to handle synchronization.
func (r *RGWCollector) Collect(ch chan<- prometheus.Metric) {
func (r *RGWCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
if !r.background {
r.logger.WithField("background", r.background).Debug("collecting RGW GC stats")
err := r.collect()