pass version to collectors when calling Collect()

2024-12-18 05:04:44 +00:00 · 2023-02-14 11:10:54 -08:00 · 2023-02-14 11:10:54 -08:00 · ba15bf50a3
commit ba15bf50a3
parent 69edc55596
10 changed files with 90 additions and 70 deletions
--- a/ceph/cluster_usage.go
+++ b/ceph/cluster_usage.go
@ -32,7 +32,6 @@ const (
 type ClusterUsageCollector struct {
 	conn   Conn
 	logger *logrus.Logger
-	version *Version

 	// GlobalCapacity displays the total storage capacity of the cluster. This
 	// information is based on the actual no. of objects that are
@ -57,7 +56,6 @@ func NewClusterUsageCollector(exporter *Exporter) *ClusterUsageCollector {
 	return &ClusterUsageCollector{
 		conn:   exporter.Conn,
 		logger: exporter.Logger,
-		version: exporter.Version,

 		GlobalCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
 			Namespace:   cephNamespace,
@ -106,7 +104,6 @@ func (c *ClusterUsageCollector) collect() error {

 		return err
 	}
-
 	stats := &cephClusterStats{}
 	if err := json.Unmarshal(buf, stats); err != nil {
 		return err
@ -143,7 +140,7 @@ func (c *ClusterUsageCollector) Describe(ch chan<- *prometheus.Desc) {

 // Collect sends the metric values for each metric pertaining to the global
 // cluster usage over to the provided prometheus Metric channel.
-func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric) {
+func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	c.logger.Debug("collecting cluster usage metrics")
 	if err := c.collect(); err != nil {
 		c.logger.WithError(err).Error("error collecting cluster usage metrics")
--- a/ceph/crashes.go
+++ b/ceph/crashes.go
@ -33,7 +33,6 @@ var (
 type CrashesCollector struct {
 	conn   Conn
 	logger *logrus.Logger
-	version *Version

 	crashReportsDesc *prometheus.Desc
 }
@ -46,7 +45,6 @@ func NewCrashesCollector(exporter *Exporter) *CrashesCollector {
 	collector := &CrashesCollector{
 		conn:   exporter.Conn,
 		logger: exporter.Logger,
-		version: exporter.Version,

 		crashReportsDesc: prometheus.NewDesc(
 			fmt.Sprintf("%s_crash_reports", cephNamespace),
@ -106,7 +104,7 @@ func (c *CrashesCollector) Describe(ch chan<- *prometheus.Desc) {
 }

 // Collect sends all the collected metrics Prometheus.
-func (c *CrashesCollector) Collect(ch chan<- prometheus.Metric) {
+func (c *CrashesCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	crashes, err := c.getCrashLs()
 	if err != nil {
 		c.logger.WithError(err).Error("failed to run 'ceph crash ls'")
--- a/ceph/exporter.go
+++ b/ceph/exporter.go
@ -226,7 +226,26 @@ func (exporter *Exporter) Describe(ch chan<- *prometheus.Desc) {
 	}

 	for _, cc := range exporter.cc {
-		cc.Describe(ch)
+		switch cc.(type) {
+		case *ClusterUsageCollector:
+			cc.(*ClusterUsageCollector).Describe(ch)
+		case *PoolUsageCollector:
+			cc.(*PoolUsageCollector).Describe(ch)
+		case *PoolInfoCollector:
+			cc.(*PoolInfoCollector).Describe(ch)
+		case *ClusterHealthCollector:
+			cc.(*ClusterHealthCollector).Describe(ch)
+		case *MonitorCollector:
+			cc.(*MonitorCollector).Describe(ch)
+		case *OSDCollector:
+			cc.(*OSDCollector).Describe(ch)
+		case *CrashesCollector:
+			cc.(*CrashesCollector).Describe(ch)
+		case *RbdMirrorStatusCollector:
+			cc.(*RbdMirrorStatusCollector).Describe(ch)
+		case *RGWCollector:
+			cc.(*RGWCollector).Describe(ch)
+		}
 	}
 }

@ -250,6 +269,25 @@ func (exporter *Exporter) Collect(ch chan<- prometheus.Metric) {
 	}

 	for _, cc := range exporter.cc {
-		cc.Collect(ch)
+		switch cc.(type) {
+		case *ClusterUsageCollector:
+			cc.(*ClusterUsageCollector).Collect(ch, exporter.Version)
+		case *PoolUsageCollector:
+			cc.(*PoolUsageCollector).Collect(ch, exporter.Version)
+		case *PoolInfoCollector:
+			cc.(*PoolInfoCollector).Collect(ch, exporter.Version)
+		case *ClusterHealthCollector:
+			cc.(*ClusterHealthCollector).Collect(ch, exporter.Version)
+		case *MonitorCollector:
+			cc.(*MonitorCollector).Collect(ch, exporter.Version)
+		case *OSDCollector:
+			cc.(*OSDCollector).Collect(ch, exporter.Version)
+		case *CrashesCollector:
+			cc.(*CrashesCollector).Collect(ch, exporter.Version)
+		case *RbdMirrorStatusCollector:
+			cc.(*RbdMirrorStatusCollector).Collect(ch, exporter.Version)
+		case *RGWCollector:
+			cc.(*RGWCollector).Collect(ch, exporter.Version)
+		}
 	}
 }
--- a/ceph/health.go
+++ b/ceph/health.go
@ -49,7 +49,6 @@ var (
 type ClusterHealthCollector struct {
 	conn   Conn
 	logger *logrus.Logger
-	version *Version

 	// healthChecksMap stores warnings and their criticality
 	healthChecksMap map[string]int
@ -289,7 +288,6 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
 	collector := &ClusterHealthCollector{
 		conn:   exporter.Conn,
 		logger: exporter.Logger,
-		version: exporter.Version,

 		healthChecksMap: map[string]int{
 			"AUTH_BAD_CAPS":                        2,
@ -558,13 +556,6 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
 		"notieragent":  &collector.OSDMapFlagNoTierAgent,
 	}

-	if exporter.Version.IsAtLeast(Pacific) {
-		// pacific adds the DAEMON_OLD_VERSION health check
-		// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
-		// we'll interpret this is a critical warning (2)
-		collector.healthChecksMap["DAEMON_OLD_VERSION"] = 2
-	}
-
 	return collector
 }

@ -724,7 +715,7 @@ type cephHealthStats struct {
 	} `json:"servicemap"`
 }

-func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
+func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric, version *Version) error {
 	cmd := c.cephUsageCommand(jsonFormat)
 	buf, _, err := c.conn.MonCommand(cmd)
 	if err != nil {
@ -883,6 +874,14 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
 				}
 			}
 		}
+
+		if version.IsAtLeast(Pacific) {
+			// pacific adds the DAEMON_OLD_VERSION health check
+			// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
+			// we'll interpret this is a critical warning (2)
+			c.healthChecksMap["DAEMON_OLD_VERSION"] = 2
+		}
+
 		if !mapEmpty {
 			if val, present := c.healthChecksMap[k]; present {
 				c.HealthStatusInterpreter.Set(float64(val))
@ -991,7 +990,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
 	ch <- prometheus.MustNewConstMetric(c.CachePromoteIOOps, prometheus.GaugeValue, stats.PGMap.CachePromoteOpPerSec)

 	var actualOsdMap osdMap
-	if c.version.IsAtLeast(Octopus) {
+	if version.IsAtLeast(Octopus) {
 		if stats.OSDMap != nil {
 			actualOsdMap = osdMap{
 				NumOSDs:        stats.OSDMap["num_osds"].(float64),
@ -1031,7 +1030,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {

 	activeMgr := 0
 	standByMgrs := 0
-	if c.version.IsAtLeast(Octopus) {
+	if version.IsAtLeast(Octopus) {
 		if stats.MgrMap.Available {
 			activeMgr = 1
 		}
@ -1334,9 +1333,9 @@ func (c *ClusterHealthCollector) Describe(ch chan<- *prometheus.Desc) {

 // Collect sends all the collected metrics to the provided prometheus channel.
 // It requires the caller to handle synchronization.
-func (c *ClusterHealthCollector) Collect(ch chan<- prometheus.Metric) {
+func (c *ClusterHealthCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	c.logger.Debug("collecting cluster health metrics")
-	if err := c.collect(ch); err != nil {
+	if err := c.collect(ch, version); err != nil {
 		c.logger.WithError(err).Error("error collecting cluster health metrics " + err.Error())
 	}

--- a/ceph/monitors.go
+++ b/ceph/monitors.go
@ -34,7 +34,6 @@ var versionRegexp = regexp.MustCompile(`ceph version (?P<version_tag>\d+\.\d+\.\
 type MonitorCollector struct {
 	conn   Conn
 	logger *logrus.Logger
-	version *Version

 	// TotalKBs display the total storage a given monitor node has.
 	TotalKBs *prometheus.GaugeVec
@ -98,7 +97,6 @@ func NewMonitorCollector(exporter *Exporter) *MonitorCollector {
 	return &MonitorCollector{
 		conn:   exporter.Conn,
 		logger: exporter.Logger,
-		version: exporter.Version,

 		TotalKBs: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
@ -553,7 +551,7 @@ func (m *MonitorCollector) Describe(ch chan<- *prometheus.Desc) {

 // Collect extracts the given metrics from the Monitors and sends it to the prometheus
 // channel.
-func (m *MonitorCollector) Collect(ch chan<- prometheus.Metric) {
+func (m *MonitorCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	m.logger.Debug("collecting ceph monitor metrics")
 	if err := m.collect(); err != nil {
 		m.logger.WithError(err).Error("error collecting ceph monitor metrics")
--- a/ceph/osd.go
+++ b/ceph/osd.go
@ -42,7 +42,6 @@ const (
 type OSDCollector struct {
 	conn   Conn
 	logger *logrus.Logger
-	version *Version

 	// osdScrubCache holds the cache of previous PG scrubs
 	osdScrubCache map[int]int
@ -152,9 +151,6 @@ type OSDCollector struct {
 	OldestInactivePG prometheus.Gauge
 }

-// This ensures OSDCollector implements interface prometheus.Collector.
-var _ prometheus.Collector = &OSDCollector{}
-
 // NewOSDCollector creates an instance of the OSDCollector and instantiates the
 // individual metrics that show information about the OSD.
 func NewOSDCollector(exporter *Exporter) *OSDCollector {
@ -165,7 +161,6 @@ func NewOSDCollector(exporter *Exporter) *OSDCollector {
 	return &OSDCollector{
 		conn:   exporter.Conn,
 		logger: exporter.Logger,
-		version: exporter.Version,

 		osdScrubCache:       make(map[int]int),
 		osdLabelsCache:      make(map[int64]*cephOSDLabel),
@ -1119,7 +1114,7 @@ func (o *OSDCollector) Describe(ch chan<- *prometheus.Desc) {

 // Collect sends all the collected metrics to the provided Prometheus channel.
 // It requires the caller to handle synchronization.
-func (o *OSDCollector) Collect(ch chan<- prometheus.Metric) {
+func (o *OSDCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	// Reset daemon specifc metrics; daemons can leave the cluster
 	o.CrushWeight.Reset()
 	o.Depth.Reset()
--- a/ceph/pool.go
+++ b/ceph/pool.go
@ -34,7 +34,6 @@ const (
 type PoolInfoCollector struct {
 	conn   Conn
 	logger *logrus.Logger
-	version *Version

 	// PGNum contains the count of PGs allotted to a particular pool.
 	PGNum *prometheus.GaugeVec
@ -77,7 +76,6 @@ func NewPoolInfoCollector(exporter *Exporter) *PoolInfoCollector {
 	return &PoolInfoCollector{
 		conn:   exporter.Conn,
 		logger: exporter.Logger,
-		version: exporter.Version,

 		PGNum: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
@ -261,7 +259,7 @@ func (p *PoolInfoCollector) Describe(ch chan<- *prometheus.Desc) {

 // Collect extracts the current values of all the metrics and sends them to the
 // prometheus channel.
-func (p *PoolInfoCollector) Collect(ch chan<- prometheus.Metric) {
+func (p *PoolInfoCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	p.logger.Debug("collecting pool metrics")
 	if err := p.collect(); err != nil {
 		p.logger.WithError(err).Error("error collecting pool metrics")
--- a/ceph/pool_usage.go
+++ b/ceph/pool_usage.go
@ -27,7 +27,6 @@ import (
 type PoolUsageCollector struct {
 	conn   Conn
 	logger *logrus.Logger
-	version *Version

 	// UsedBytes tracks the amount of bytes currently allocated for the pool. This
 	// does not factor in the overcommitment made for individual images.
@ -82,7 +81,6 @@ func NewPoolUsageCollector(exporter *Exporter) *PoolUsageCollector {
 	return &PoolUsageCollector{
 		conn:   exporter.Conn,
 		logger: exporter.Logger,
-		version: exporter.Version,

 		UsedBytes: prometheus.NewDesc(fmt.Sprintf("%s_%s_used_bytes", cephNamespace, subSystem), "Capacity of the pool that is currently under use",
 			poolLabel, labels,
@ -213,7 +211,7 @@ func (p *PoolUsageCollector) Describe(ch chan<- *prometheus.Desc) {

 // Collect extracts the current values of all the metrics and sends them to the
 // prometheus channel.
-func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric) {
+func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	p.logger.Debug("collecting pool usage metrics")
 	if err := p.collect(ch); err != nil {
 		p.logger.WithError(err).Error("error collecting pool usage metrics")
--- a/ceph/rbd_mirror_status.go
+++ b/ceph/rbd_mirror_status.go
@ -155,7 +155,7 @@ func (c *RbdMirrorStatusCollector) Describe(ch chan<- *prometheus.Desc) {
 }

 // Collect sends all the collected metrics Prometheus.
-func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric) {
+func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	status, err := rbdMirrorStatus(c.config, c.user)
 	if err != nil {
 		c.logger.WithError(err).Error("failed to run 'rbd mirror pool status'")
@ -166,6 +166,7 @@ func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric) {
 	}

 	c.RbdMirrorStatus.Set(c.mirrorStatusStringToInt(rbdStatus.Summary.Health))
+	c.version = version

 	if c.version.IsAtLeast(Pacific) {
 		c.RbdMirrorDaemonStatus.Set(c.mirrorStatusStringToInt(rbdStatus.Summary.DaemonHealth))
--- a/ceph/rgw.go
+++ b/ceph/rgw.go
@ -76,7 +76,6 @@ type RGWCollector struct {
 	user       string
 	background bool
 	logger     *logrus.Logger
-	version    *Version

 	// ActiveTasks reports the number of (expired) RGW GC tasks
 	ActiveTasks *prometheus.GaugeVec
@ -101,7 +100,6 @@ func NewRGWCollector(exporter *Exporter, background bool) *RGWCollector {
 		config:           exporter.Config,
 		background:       background,
 		logger:           exporter.Logger,
-		version:          exporter.Version,
 		getRGWGCTaskList: rgwGetGCTaskList,

 		ActiveTasks: prometheus.NewGaugeVec(
@ -219,7 +217,7 @@ func (r *RGWCollector) Describe(ch chan<- *prometheus.Desc) {

 // Collect sends all the collected metrics to the provided prometheus channel.
 // It requires the caller to handle synchronization.
-func (r *RGWCollector) Collect(ch chan<- prometheus.Metric) {
+func (r *RGWCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
 	if !r.background {
 		r.logger.WithField("background", r.background).Debug("collecting RGW GC stats")
 		err := r.collect()