mirror of
https://github.com/digitalocean/ceph_exporter
synced 2024-12-17 20:55:16 +00:00
pass version to collectors when calling Collect()
This commit is contained in:
parent
69edc55596
commit
ba15bf50a3
@ -30,9 +30,8 @@ const (
|
||||
// is growing or shrinking as a whole in order to zero in on the cause. The
|
||||
// pool specific stats are provided separately.
|
||||
type ClusterUsageCollector struct {
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
version *Version
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
|
||||
// GlobalCapacity displays the total storage capacity of the cluster. This
|
||||
// information is based on the actual no. of objects that are
|
||||
@ -55,9 +54,8 @@ func NewClusterUsageCollector(exporter *Exporter) *ClusterUsageCollector {
|
||||
labels["cluster"] = exporter.Cluster
|
||||
|
||||
return &ClusterUsageCollector{
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
version: exporter.Version,
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
|
||||
GlobalCapacity: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: cephNamespace,
|
||||
@ -106,7 +104,6 @@ func (c *ClusterUsageCollector) collect() error {
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
stats := &cephClusterStats{}
|
||||
if err := json.Unmarshal(buf, stats); err != nil {
|
||||
return err
|
||||
@ -143,7 +140,7 @@ func (c *ClusterUsageCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
||||
// Collect sends the metric values for each metric pertaining to the global
|
||||
// cluster usage over to the provided prometheus Metric channel.
|
||||
func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
func (c *ClusterUsageCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
|
||||
c.logger.Debug("collecting cluster usage metrics")
|
||||
if err := c.collect(); err != nil {
|
||||
c.logger.WithError(err).Error("error collecting cluster usage metrics")
|
||||
|
@ -31,9 +31,8 @@ var (
|
||||
// This is NOT the same as new_crash_reports, that only counts new reports in the past
|
||||
// two weeks as reported by 'ceph health'.
|
||||
type CrashesCollector struct {
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
version *Version
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
|
||||
crashReportsDesc *prometheus.Desc
|
||||
}
|
||||
@ -44,9 +43,8 @@ func NewCrashesCollector(exporter *Exporter) *CrashesCollector {
|
||||
labels["cluster"] = exporter.Cluster
|
||||
|
||||
collector := &CrashesCollector{
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
version: exporter.Version,
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
|
||||
crashReportsDesc: prometheus.NewDesc(
|
||||
fmt.Sprintf("%s_crash_reports", cephNamespace),
|
||||
@ -106,7 +104,7 @@ func (c *CrashesCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
}
|
||||
|
||||
// Collect sends all the collected metrics Prometheus.
|
||||
func (c *CrashesCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
func (c *CrashesCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
|
||||
crashes, err := c.getCrashLs()
|
||||
if err != nil {
|
||||
c.logger.WithError(err).Error("failed to run 'ceph crash ls'")
|
||||
|
@ -226,7 +226,26 @@ func (exporter *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
||||
}
|
||||
|
||||
for _, cc := range exporter.cc {
|
||||
cc.Describe(ch)
|
||||
switch cc.(type) {
|
||||
case *ClusterUsageCollector:
|
||||
cc.(*ClusterUsageCollector).Describe(ch)
|
||||
case *PoolUsageCollector:
|
||||
cc.(*PoolUsageCollector).Describe(ch)
|
||||
case *PoolInfoCollector:
|
||||
cc.(*PoolInfoCollector).Describe(ch)
|
||||
case *ClusterHealthCollector:
|
||||
cc.(*ClusterHealthCollector).Describe(ch)
|
||||
case *MonitorCollector:
|
||||
cc.(*MonitorCollector).Describe(ch)
|
||||
case *OSDCollector:
|
||||
cc.(*OSDCollector).Describe(ch)
|
||||
case *CrashesCollector:
|
||||
cc.(*CrashesCollector).Describe(ch)
|
||||
case *RbdMirrorStatusCollector:
|
||||
cc.(*RbdMirrorStatusCollector).Describe(ch)
|
||||
case *RGWCollector:
|
||||
cc.(*RGWCollector).Describe(ch)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -250,6 +269,25 @@ func (exporter *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||
}
|
||||
|
||||
for _, cc := range exporter.cc {
|
||||
cc.Collect(ch)
|
||||
switch cc.(type) {
|
||||
case *ClusterUsageCollector:
|
||||
cc.(*ClusterUsageCollector).Collect(ch, exporter.Version)
|
||||
case *PoolUsageCollector:
|
||||
cc.(*PoolUsageCollector).Collect(ch, exporter.Version)
|
||||
case *PoolInfoCollector:
|
||||
cc.(*PoolInfoCollector).Collect(ch, exporter.Version)
|
||||
case *ClusterHealthCollector:
|
||||
cc.(*ClusterHealthCollector).Collect(ch, exporter.Version)
|
||||
case *MonitorCollector:
|
||||
cc.(*MonitorCollector).Collect(ch, exporter.Version)
|
||||
case *OSDCollector:
|
||||
cc.(*OSDCollector).Collect(ch, exporter.Version)
|
||||
case *CrashesCollector:
|
||||
cc.(*CrashesCollector).Collect(ch, exporter.Version)
|
||||
case *RbdMirrorStatusCollector:
|
||||
cc.(*RbdMirrorStatusCollector).Collect(ch, exporter.Version)
|
||||
case *RGWCollector:
|
||||
cc.(*RGWCollector).Collect(ch, exporter.Version)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -47,9 +47,8 @@ var (
|
||||
// It surfaces changes in the ceph parameters unlike data usage that ClusterUsageCollector
|
||||
// does.
|
||||
type ClusterHealthCollector struct {
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
version *Version
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
|
||||
// healthChecksMap stores warnings and their criticality
|
||||
healthChecksMap map[string]int
|
||||
@ -287,9 +286,8 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
|
||||
labels["cluster"] = exporter.Cluster
|
||||
|
||||
collector := &ClusterHealthCollector{
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
version: exporter.Version,
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
|
||||
healthChecksMap: map[string]int{
|
||||
"AUTH_BAD_CAPS": 2,
|
||||
@ -558,13 +556,6 @@ func NewClusterHealthCollector(exporter *Exporter) *ClusterHealthCollector {
|
||||
"notieragent": &collector.OSDMapFlagNoTierAgent,
|
||||
}
|
||||
|
||||
if exporter.Version.IsAtLeast(Pacific) {
|
||||
// pacific adds the DAEMON_OLD_VERSION health check
|
||||
// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
|
||||
// we'll interpret this is a critical warning (2)
|
||||
collector.healthChecksMap["DAEMON_OLD_VERSION"] = 2
|
||||
}
|
||||
|
||||
return collector
|
||||
}
|
||||
|
||||
@ -724,7 +715,7 @@ type cephHealthStats struct {
|
||||
} `json:"servicemap"`
|
||||
}
|
||||
|
||||
func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||
func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric, version *Version) error {
|
||||
cmd := c.cephUsageCommand(jsonFormat)
|
||||
buf, _, err := c.conn.MonCommand(cmd)
|
||||
if err != nil {
|
||||
@ -883,6 +874,14 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if version.IsAtLeast(Pacific) {
|
||||
// pacific adds the DAEMON_OLD_VERSION health check
|
||||
// that indicates that multiple versions of Ceph have been running for longer than mon_warn_older_version_delay
|
||||
// we'll interpret this is a critical warning (2)
|
||||
c.healthChecksMap["DAEMON_OLD_VERSION"] = 2
|
||||
}
|
||||
|
||||
if !mapEmpty {
|
||||
if val, present := c.healthChecksMap[k]; present {
|
||||
c.HealthStatusInterpreter.Set(float64(val))
|
||||
@ -991,7 +990,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||
ch <- prometheus.MustNewConstMetric(c.CachePromoteIOOps, prometheus.GaugeValue, stats.PGMap.CachePromoteOpPerSec)
|
||||
|
||||
var actualOsdMap osdMap
|
||||
if c.version.IsAtLeast(Octopus) {
|
||||
if version.IsAtLeast(Octopus) {
|
||||
if stats.OSDMap != nil {
|
||||
actualOsdMap = osdMap{
|
||||
NumOSDs: stats.OSDMap["num_osds"].(float64),
|
||||
@ -1031,7 +1030,7 @@ func (c *ClusterHealthCollector) collect(ch chan<- prometheus.Metric) error {
|
||||
|
||||
activeMgr := 0
|
||||
standByMgrs := 0
|
||||
if c.version.IsAtLeast(Octopus) {
|
||||
if version.IsAtLeast(Octopus) {
|
||||
if stats.MgrMap.Available {
|
||||
activeMgr = 1
|
||||
}
|
||||
@ -1334,9 +1333,9 @@ func (c *ClusterHealthCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
||||
// Collect sends all the collected metrics to the provided prometheus channel.
|
||||
// It requires the caller to handle synchronization.
|
||||
func (c *ClusterHealthCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
func (c *ClusterHealthCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
|
||||
c.logger.Debug("collecting cluster health metrics")
|
||||
if err := c.collect(ch); err != nil {
|
||||
if err := c.collect(ch, version); err != nil {
|
||||
c.logger.WithError(err).Error("error collecting cluster health metrics " + err.Error())
|
||||
}
|
||||
|
||||
|
@ -32,9 +32,8 @@ var versionRegexp = regexp.MustCompile(`ceph version (?P<version_tag>\d+\.\d+\.\
|
||||
// to each monitor instance, there are various vector metrics we
|
||||
// need to use.
|
||||
type MonitorCollector struct {
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
version *Version
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
|
||||
// TotalKBs display the total storage a given monitor node has.
|
||||
TotalKBs *prometheus.GaugeVec
|
||||
@ -96,9 +95,8 @@ func NewMonitorCollector(exporter *Exporter) *MonitorCollector {
|
||||
labels["cluster"] = exporter.Cluster
|
||||
|
||||
return &MonitorCollector{
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
version: exporter.Version,
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
|
||||
TotalKBs: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
@ -553,7 +551,7 @@ func (m *MonitorCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
||||
// Collect extracts the given metrics from the Monitors and sends it to the prometheus
|
||||
// channel.
|
||||
func (m *MonitorCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
func (m *MonitorCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
|
||||
m.logger.Debug("collecting ceph monitor metrics")
|
||||
if err := m.collect(); err != nil {
|
||||
m.logger.WithError(err).Error("error collecting ceph monitor metrics")
|
||||
|
15
ceph/osd.go
15
ceph/osd.go
@ -40,9 +40,8 @@ const (
|
||||
// An important aspect of monitoring OSDs is to ensure that when the cluster is
|
||||
// up and running that all OSDs that are in the cluster are up and running, too
|
||||
type OSDCollector struct {
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
version *Version
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
|
||||
// osdScrubCache holds the cache of previous PG scrubs
|
||||
osdScrubCache map[int]int
|
||||
@ -152,9 +151,6 @@ type OSDCollector struct {
|
||||
OldestInactivePG prometheus.Gauge
|
||||
}
|
||||
|
||||
// This ensures OSDCollector implements interface prometheus.Collector.
|
||||
var _ prometheus.Collector = &OSDCollector{}
|
||||
|
||||
// NewOSDCollector creates an instance of the OSDCollector and instantiates the
|
||||
// individual metrics that show information about the OSD.
|
||||
func NewOSDCollector(exporter *Exporter) *OSDCollector {
|
||||
@ -163,9 +159,8 @@ func NewOSDCollector(exporter *Exporter) *OSDCollector {
|
||||
osdLabels := []string{"osd", "device_class", "host", "rack", "root"}
|
||||
|
||||
return &OSDCollector{
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
version: exporter.Version,
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
|
||||
osdScrubCache: make(map[int]int),
|
||||
osdLabelsCache: make(map[int64]*cephOSDLabel),
|
||||
@ -1119,7 +1114,7 @@ func (o *OSDCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
||||
// Collect sends all the collected metrics to the provided Prometheus channel.
|
||||
// It requires the caller to handle synchronization.
|
||||
func (o *OSDCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
func (o *OSDCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
|
||||
// Reset daemon specifc metrics; daemons can leave the cluster
|
||||
o.CrushWeight.Reset()
|
||||
o.Depth.Reset()
|
||||
|
12
ceph/pool.go
12
ceph/pool.go
@ -32,9 +32,8 @@ const (
|
||||
// PoolInfoCollector gives information about each pool that exists in a given
|
||||
// ceph cluster.
|
||||
type PoolInfoCollector struct {
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
version *Version
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
|
||||
// PGNum contains the count of PGs allotted to a particular pool.
|
||||
PGNum *prometheus.GaugeVec
|
||||
@ -75,9 +74,8 @@ func NewPoolInfoCollector(exporter *Exporter) *PoolInfoCollector {
|
||||
labels["cluster"] = exporter.Cluster
|
||||
|
||||
return &PoolInfoCollector{
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
version: exporter.Version,
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
|
||||
PGNum: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
@ -261,7 +259,7 @@ func (p *PoolInfoCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
||||
// Collect extracts the current values of all the metrics and sends them to the
|
||||
// prometheus channel.
|
||||
func (p *PoolInfoCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
func (p *PoolInfoCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
|
||||
p.logger.Debug("collecting pool metrics")
|
||||
if err := p.collect(); err != nil {
|
||||
p.logger.WithError(err).Error("error collecting pool metrics")
|
||||
|
@ -25,9 +25,8 @@ import (
|
||||
|
||||
// PoolUsageCollector displays statistics about each pool in the Ceph cluster.
|
||||
type PoolUsageCollector struct {
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
version *Version
|
||||
conn Conn
|
||||
logger *logrus.Logger
|
||||
|
||||
// UsedBytes tracks the amount of bytes currently allocated for the pool. This
|
||||
// does not factor in the overcommitment made for individual images.
|
||||
@ -80,9 +79,8 @@ func NewPoolUsageCollector(exporter *Exporter) *PoolUsageCollector {
|
||||
labels["cluster"] = exporter.Cluster
|
||||
|
||||
return &PoolUsageCollector{
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
version: exporter.Version,
|
||||
conn: exporter.Conn,
|
||||
logger: exporter.Logger,
|
||||
|
||||
UsedBytes: prometheus.NewDesc(fmt.Sprintf("%s_%s_used_bytes", cephNamespace, subSystem), "Capacity of the pool that is currently under use",
|
||||
poolLabel, labels,
|
||||
@ -213,7 +211,7 @@ func (p *PoolUsageCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
||||
// Collect extracts the current values of all the metrics and sends them to the
|
||||
// prometheus channel.
|
||||
func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
func (p *PoolUsageCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
|
||||
p.logger.Debug("collecting pool usage metrics")
|
||||
if err := p.collect(ch); err != nil {
|
||||
p.logger.WithError(err).Error("error collecting pool usage metrics")
|
||||
|
@ -155,7 +155,7 @@ func (c *RbdMirrorStatusCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
}
|
||||
|
||||
// Collect sends all the collected metrics Prometheus.
|
||||
func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
|
||||
status, err := rbdMirrorStatus(c.config, c.user)
|
||||
if err != nil {
|
||||
c.logger.WithError(err).Error("failed to run 'rbd mirror pool status'")
|
||||
@ -166,6 +166,7 @@ func (c *RbdMirrorStatusCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
}
|
||||
|
||||
c.RbdMirrorStatus.Set(c.mirrorStatusStringToInt(rbdStatus.Summary.Health))
|
||||
c.version = version
|
||||
|
||||
if c.version.IsAtLeast(Pacific) {
|
||||
c.RbdMirrorDaemonStatus.Set(c.mirrorStatusStringToInt(rbdStatus.Summary.DaemonHealth))
|
||||
|
@ -76,7 +76,6 @@ type RGWCollector struct {
|
||||
user string
|
||||
background bool
|
||||
logger *logrus.Logger
|
||||
version *Version
|
||||
|
||||
// ActiveTasks reports the number of (expired) RGW GC tasks
|
||||
ActiveTasks *prometheus.GaugeVec
|
||||
@ -101,7 +100,6 @@ func NewRGWCollector(exporter *Exporter, background bool) *RGWCollector {
|
||||
config: exporter.Config,
|
||||
background: background,
|
||||
logger: exporter.Logger,
|
||||
version: exporter.Version,
|
||||
getRGWGCTaskList: rgwGetGCTaskList,
|
||||
|
||||
ActiveTasks: prometheus.NewGaugeVec(
|
||||
@ -219,7 +217,7 @@ func (r *RGWCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
||||
// Collect sends all the collected metrics to the provided prometheus channel.
|
||||
// It requires the caller to handle synchronization.
|
||||
func (r *RGWCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
func (r *RGWCollector) Collect(ch chan<- prometheus.Metric, version *Version) {
|
||||
if !r.background {
|
||||
r.logger.WithField("background", r.background).Debug("collecting RGW GC stats")
|
||||
err := r.collect()
|
||||
|
Loading…
Reference in New Issue
Block a user