pass version to collectors when calling Collect()

2025-03-06 20:17:26 +00:00 · 2023-02-14 11:10:54 -08:00 · 2023-02-14 11:10:54 -08:00 · ba15bf50a3
commit ba15bf50a3
parent 69edc55596
10 changed files with 90 additions and 70 deletions
--- a/ceph/cluster_usage.go
+++ b/ceph/cluster_usage.go
@ -30,9 +30,8 @@ const (
 // is growing or shrinking as a whole in order to zero in on the cause. The
 // pool specific stats are provided separately.
 type ClusterUsageCollector struct {
-	conn    Conn
+	conn   Conn
-	logger  *logrus.Logger
+	logger *logrus.Logger
 	version *Version
 	// GlobalCapacity displays the total storage capacity of the cluster. This
 	// information is based on the actual no. of objects that are
@ -55,9 +54,8 @@ func NewClusterUsageCollector(exporter *Exporter) *ClusterUsageCollector {
 	labels["cluster"] = exporter.Cluster
 	return &ClusterUsageCollector{
-		conn:    exporter.Conn,
+		conn:   exporter.Conn,
-		logger:  exporter.Logger,
+		logger: exporter.Logger,
 		version: exporter.Version,
 		GlobalCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
 			Namespace:   cephNamespace,
@ -106,7 +104,6 @@ func (c *ClusterUsageCollector) collect() error {
 		return err
 	}
 	stats := &cephClusterStats{}
 	if err := json.Unmarshal(buf, stats); err != nil {
 		return err
@ -143,7 +140,7 @@ func (c *ClusterUsageCollector) Describe(ch chan<- *prometheus.Desc) {
 // Collect sends the metric values for each metric pertaining to the global
 // cluster usage over to the provided prometheus Metric channel.
-func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric) {
+func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	c.logger.Debug("collecting cluster usage metrics")
 	if err := c.collect(); err != nil {
 		c.logger.WithError(err).Error("error collecting cluster usage metrics")
--- a/ceph/crashes.go
+++ b/ceph/crashes.go
@ -31,9 +31,8 @@ var (
 // This is NOT the same as new_crash_reports, that only counts new reports in the past
 // two weeks as reported by 'ceph health'.
 type CrashesCollector struct {
-	conn    Conn
+	conn   Conn
-	logger  *logrus.Logger
+	logger *logrus.Logger
 	version *Version
 	crashReportsDesc *prometheus.Desc
 }
@ -44,9 +43,8 @@ func NewCrashesCollector(exporter *Exporter) *CrashesCollector {
 	labels["cluster"] = exporter.Cluster
 	collector := &CrashesCollector{
-		conn:    exporter.Conn,
+		conn:   exporter.Conn,
-		logger:  exporter.Logger,
+		logger: exporter.Logger,
 		version: exporter.Version,
 		crashReportsDesc: prometheus.NewDesc(
 			fmt.Sprintf("%s_crash_reports", cephNamespace),
@ -106,7 +104,7 @@ func (c *CrashesCollector) Describe(ch chan<- *prometheus.Desc) {
 }
 // Collect sends all the collected metrics Prometheus.
-func (c *CrashesCollector) Collect(ch chan<- prometheus.Metric) {
+func (c *CrashesCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	crashes, err := c.getCrashLs()
 	if err != nil {
 		c.logger.WithError(err).Error("failed to run 'ceph crash ls'")
--- a/ceph/exporter.go
+++ b/ceph/exporter.go
@ -226,7 +226,26 @@ func (exporter *Exporter) Describe(ch chan<- *prometheus.Desc) {
 	}
 	for _, cc := range exporter.cc {
-		cc.Describe(ch)
+		switch cc.(type) {
 		case *ClusterUsageCollector:
 			cc.(*ClusterUsageCollector).Describe(ch)
 		case *PoolUsageCollector:
 			cc.(*PoolUsageCollector).Describe(ch)
 		case *PoolInfoCollector:
 			cc.(*PoolInfoCollector).Describe(ch)
 		case *ClusterHealthCollector:
 			cc.(*ClusterHealthCollector).Describe(ch)
 		case *MonitorCollector:
 			cc.(*MonitorCollector).Describe(ch)
 		case *OSDCollector:
 			cc.(*OSDCollector).Describe(ch)
 		case *CrashesCollector:
 			cc.(*CrashesCollector).Describe(ch)
 		case *RbdMirrorStatusCollector:
 			cc.(*RbdMirrorStatusCollector).Describe(ch)
 		case *RGWCollector:
 			cc.(*RGWCollector).Describe(ch)
 		}
 	}
 }
@ -250,6 +269,25 @@ func (exporter *Exporter) Collect(ch chan<- prometheus.Metric) {
 	}
 	for _, cc := range exporter.cc {
-		cc.Collect(ch)
+		switch cc.(type) {
 		case *ClusterUsageCollector:
 			cc.(*ClusterUsageCollector).Collect(ch, exporter.Version)
 		case *PoolUsageCollector:
 			cc.(*PoolUsageCollector).Collect(ch, exporter.Version)
 		case *PoolInfoCollector:
 			cc.(*PoolInfoCollector).Collect(ch, exporter.Version)
 		case *ClusterHealthCollector:
 			cc.(*ClusterHealthCollector).Collect(ch, exporter.Version)
 		case *MonitorCollector:
 			cc.(*MonitorCollector).Collect(ch, exporter.Version)
 		case *OSDCollector:
 			cc.(*OSDCollector).Collect(ch, exporter.Version)
 		case *CrashesCollector:
 			cc.(*CrashesCollector).Collect(ch, exporter.Version)
 		case *RbdMirrorStatusCollector:
 			cc.(*RbdMirrorStatusCollector).Collect(ch, exporter.Version)
 		case *RGWCollector:
 			cc.(*RGWCollector).Collect(ch, exporter.Version)
 		}
 	}
 }
--- a/ceph/health.go
+++ b/ceph/health.go
@ -47,9 +47,8 @@ var (
 // It surfaces changes in the ceph parameters unlike data usage that ClusterUsageCollector
 // does.
 type ClusterHealthCollector struct {
-	conn    Conn
+	conn   Conn
-	logger  *logrus.Logger
+	logger *logrus.Logger
 	version *Version
 	// healthChecksMap stores warnings and their criticality
 	healthChecksMap map[string]int
@ -287,9 +286,8 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
 	labels["cluster"] = exporter.Cluster
 	collector := &ClusterHealthCollector{
-		conn:    exporter.Conn,
+		conn:   exporter.Conn,
-		logger:  exporter.Logger,
+		logger: exporter.Logger,
 		version: exporter.Version,
 		healthChecksMap: map[string]int{
 			"AUTH_BAD_CAPS":                        2,
@ -558,13 +556,6 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
 		"notieragent":  &collector.OSDMapFlagNoTierAgent,
 	}
 	if exporter.Version.IsAtLeast(Pacific) {
 		// pacific adds the DAEMON_OLD_VERSION health check
 		// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
 		// we'll interpret this is a critical warning (2)
 		collector.healthChecksMap["DAEMON_OLD_VERSION"] = 2
 	}
 	return collector
 }
@ -724,7 +715,7 @@ type cephHealthStats struct {
 	} `json:"servicemap"`
 }
-func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
+func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric, version *Version) error {
 	cmd := c.cephUsageCommand(jsonFormat)
 	buf, _, err := c.conn.MonCommand(cmd)
 	if err != nil {
@ -883,6 +874,14 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
 				}
 			}
 		}
 		if version.IsAtLeast(Pacific) {
 			// pacific adds the DAEMON_OLD_VERSION health check
 			// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
 			// we'll interpret this is a critical warning (2)
 			c.healthChecksMap["DAEMON_OLD_VERSION"] = 2
 		}
 		if !mapEmpty {
 			if val, present := c.healthChecksMap[k]; present {
 				c.HealthStatusInterpreter.Set(float64(val))
@ -991,7 +990,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
 	ch <- prometheus.MustNewConstMetric(c.CachePromoteIOOps, prometheus.GaugeValue, stats.PGMap.CachePromoteOpPerSec)
 	var actualOsdMap osdMap
-	if c.version.IsAtLeast(Octopus) {
+	if version.IsAtLeast(Octopus) {
 		if stats.OSDMap != nil {
 			actualOsdMap = osdMap{
 				NumOSDs:        stats.OSDMap["num_osds"].(float64),
@ -1031,7 +1030,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
 	activeMgr := 0
 	standByMgrs := 0
-	if c.version.IsAtLeast(Octopus) {
+	if version.IsAtLeast(Octopus) {
 		if stats.MgrMap.Available {
 			activeMgr = 1
 		}
@ -1334,9 +1333,9 @@ func (c *ClusterHealthCollector) Describe(ch chan<- *prometheus.Desc) {
 // Collect sends all the collected metrics to the provided prometheus channel.
 // It requires the caller to handle synchronization.
-func (c *ClusterHealthCollector) Collect(ch chan<- prometheus.Metric) {
+func (c *ClusterHealthCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	c.logger.Debug("collecting cluster health metrics")
-	if err := c.collect(ch); err != nil {
+	if err := c.collect(ch, version); err != nil {
 		c.logger.WithError(err).Error("error collecting cluster health metrics " + err.Error())
 	}
--- a/ceph/monitors.go
+++ b/ceph/monitors.go
@ -32,9 +32,8 @@ var versionRegexp = regexp.MustCompile(`ceph version (?P<version_tag>\d+\.\d+\.\
 // to each monitor instance, there are various vector metrics we
 // need to use.
 type MonitorCollector struct {
-	conn    Conn
+	conn   Conn
-	logger  *logrus.Logger
+	logger *logrus.Logger
 	version *Version
 	// TotalKBs display the total storage a given monitor node has.
 	TotalKBs *prometheus.GaugeVec
@ -96,9 +95,8 @@ func NewMonitorCollector(exporter *Exporter) *MonitorCollector {
 	labels["cluster"] = exporter.Cluster
 	return &MonitorCollector{
-		conn:    exporter.Conn,
+		conn:   exporter.Conn,
-		logger:  exporter.Logger,
+		logger: exporter.Logger,
 		version: exporter.Version,
 		TotalKBs: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
@ -553,7 +551,7 @@ func (m *MonitorCollector) Describe(ch chan<- *prometheus.Desc) {
 // Collect extracts the given metrics from the Monitors and sends it to the prometheus
 // channel.
-func (m *MonitorCollector) Collect(ch chan<- prometheus.Metric) {
+func (m *MonitorCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	m.logger.Debug("collecting ceph monitor metrics")
 	if err := m.collect(); err != nil {
 		m.logger.WithError(err).Error("error collecting ceph monitor metrics")
--- a/ceph/osd.go
+++ b/ceph/osd.go
@ -40,9 +40,8 @@ const (
 // An important aspect of monitoring OSDs is to ensure that when the cluster is
 // up and running that all OSDs that are in the cluster are up and running, too
 type OSDCollector struct {
-	conn    Conn
+	conn   Conn
-	logger  *logrus.Logger
+	logger *logrus.Logger
 	version *Version
 	// osdScrubCache holds the cache of previous PG scrubs
 	osdScrubCache map[int]int
@ -152,9 +151,6 @@ type OSDCollector struct {
 	OldestInactivePG prometheus.Gauge
 }
 // This ensures OSDCollector implements interface prometheus.Collector.
 var _ prometheus.Collector = &OSDCollector{}
 // NewOSDCollector creates an instance of the OSDCollector and instantiates the
 // individual metrics that show information about the OSD.
 func NewOSDCollector(exporter *Exporter) *OSDCollector {
@ -163,9 +159,8 @@ func NewOSDCollector(exporter *Exporter) *OSDCollector {
 	osdLabels := []string{"osd", "device_class", "host", "rack", "root"}
 	return &OSDCollector{
-		conn:    exporter.Conn,
+		conn:   exporter.Conn,
-		logger:  exporter.Logger,
+		logger: exporter.Logger,
 		version: exporter.Version,
 		osdScrubCache:       make(map[int]int),
 		osdLabelsCache:      make(map[int64]*cephOSDLabel),
@ -1119,7 +1114,7 @@ func (o *OSDCollector) Describe(ch chan<- *prometheus.Desc) {
 // Collect sends all the collected metrics to the provided Prometheus channel.
 // It requires the caller to handle synchronization.
-func (o *OSDCollector) Collect(ch chan<- prometheus.Metric) {
+func (o *OSDCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	// Reset daemon specifc metrics; daemons can leave the cluster
 	o.CrushWeight.Reset()
 	o.Depth.Reset()
--- a/ceph/pool.go
+++ b/ceph/pool.go
@ -32,9 +32,8 @@ const (
 // PoolInfoCollector gives information about each pool that exists in a given
 // ceph cluster.
 type PoolInfoCollector struct {
-	conn    Conn
+	conn   Conn
-	logger  *logrus.Logger
+	logger *logrus.Logger
 	version *Version
 	// PGNum contains the count of PGs allotted to a particular pool.
 	PGNum *prometheus.GaugeVec
@ -75,9 +74,8 @@ func NewPoolInfoCollector(exporter *Exporter) *PoolInfoCollector {
 	labels["cluster"] = exporter.Cluster
 	return &PoolInfoCollector{
-		conn:    exporter.Conn,
+		conn:   exporter.Conn,
-		logger:  exporter.Logger,
+		logger: exporter.Logger,
 		version: exporter.Version,
 		PGNum: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
@ -261,7 +259,7 @@ func (p *PoolInfoCollector) Describe(ch chan<- *prometheus.Desc) {
 // Collect extracts the current values of all the metrics and sends them to the
 // prometheus channel.
-func (p *PoolInfoCollector) Collect(ch chan<- prometheus.Metric) {
+func (p *PoolInfoCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	p.logger.Debug("collecting pool metrics")
 	if err := p.collect(); err != nil {
 		p.logger.WithError(err).Error("error collecting pool metrics")
--- a/ceph/pool_usage.go
+++ b/ceph/pool_usage.go
@ -25,9 +25,8 @@ import (
 // PoolUsageCollector displays statistics about each pool in the Ceph cluster.
 type PoolUsageCollector struct {
-	conn    Conn
+	conn   Conn
-	logger  *logrus.Logger
+	logger *logrus.Logger
 	version *Version
 	// UsedBytes tracks the amount of bytes currently allocated for the pool. This
 	// does not factor in the overcommitment made for individual images.
@ -80,9 +79,8 @@ func NewPoolUsageCollector(exporter *Exporter) *PoolUsageCollector {
 	labels["cluster"] = exporter.Cluster
 	return &PoolUsageCollector{
-		conn:    exporter.Conn,
+		conn:   exporter.Conn,
-		logger:  exporter.Logger,
+		logger: exporter.Logger,
 		version: exporter.Version,
 		UsedBytes: prometheus.NewDesc(fmt.Sprintf("%s_%s_used_bytes", cephNamespace, subSystem), "Capacity of the pool that is currently under use",
 			poolLabel, labels,
@ -213,7 +211,7 @@ func (p *PoolUsageCollector) Describe(ch chan<- *prometheus.Desc) {
 // Collect extracts the current values of all the metrics and sends them to the
 // prometheus channel.
-func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric) {
+func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	p.logger.Debug("collecting pool usage metrics")
 	if err := p.collect(ch); err != nil {
 		p.logger.WithError(err).Error("error collecting pool usage metrics")
--- a/ceph/rbd_mirror_status.go
+++ b/ceph/rbd_mirror_status.go
@ -155,7 +155,7 @@ func (c *RbdMirrorStatusCollector) Describe(ch chan<- *prometheus.Desc) {
 }
 // Collect sends all the collected metrics Prometheus.
-func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric) {
+func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	status, err := rbdMirrorStatus(c.config, c.user)
 	if err != nil {
 		c.logger.WithError(err).Error("failed to run 'rbd mirror pool status'")
@ -166,6 +166,7 @@ func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric) {
 	}
 	c.RbdMirrorStatus.Set(c.mirrorStatusStringToInt(rbdStatus.Summary.Health))
 	c.version = version
 	if c.version.IsAtLeast(Pacific) {
 		c.RbdMirrorDaemonStatus.Set(c.mirrorStatusStringToInt(rbdStatus.Summary.DaemonHealth))
--- a/ceph/rgw.go
+++ b/ceph/rgw.go
@ -76,7 +76,6 @@ type RGWCollector struct {
 	user       string
 	background bool
 	logger     *logrus.Logger
 	version    *Version
 	// ActiveTasks reports the number of (expired) RGW GC tasks
 	ActiveTasks *prometheus.GaugeVec
@ -101,7 +100,6 @@ func NewRGWCollector(exporter *Exporter, background bool) *RGWCollector {
 		config:           exporter.Config,
 		background:       background,
 		logger:           exporter.Logger,
 		version:          exporter.Version,
 		getRGWGCTaskList: rgwGetGCTaskList,
 		ActiveTasks: prometheus.NewGaugeVec(
@ -219,7 +217,7 @@ func (r *RGWCollector) Describe(ch chan<- *prometheus.Desc) {
 // Collect sends all the collected metrics to the provided prometheus channel.
 // It requires the caller to handle synchronization.
-func (r *RGWCollector) Collect(ch chan<- prometheus.Metric) {
+func (r *RGWCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	if !r.background {
 		r.logger.WithField("background", r.background).Debug("collecting RGW GC stats")
 		err := r.collect()