From ab0e9d64d3757aa3e1d5291470b44d1a830bdccc Mon Sep 17 00:00:00 2001 From: Calin Cristian Andrei Date: Wed, 19 Oct 2016 09:45:05 +0000 Subject: [PATCH] add support for osd usage varianc and pool raw bytes, dirty objects and read + write bytes --- collectors/osd.go | 21 +++++++++++ collectors/pool_usage.go | 77 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 91 insertions(+), 7 deletions(-) diff --git a/collectors/osd.go b/collectors/osd.go index 5f547fe..95fc22d 100644 --- a/collectors/osd.go +++ b/collectors/osd.go @@ -37,6 +37,9 @@ type OSDCollector struct { // Utilization displays current utilization of the OSD Utilization *prometheus.GaugeVec + // Variance displays current variance of the OSD from the standard utilization + Variance *prometheus.GaugeVec + // Pgs displays total no. of placement groups in the OSD. // Available in Ceph Jewel version. Pgs *prometheus.GaugeVec @@ -135,6 +138,15 @@ func NewOSDCollector(conn Conn) *OSDCollector { []string{"osd"}, ), + Variance: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: cephNamespace, + Name: "osd_variance", + Help: "OSD Variance", + }, + []string{"osd"}, + ), + Pgs: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: cephNamespace, @@ -222,6 +234,7 @@ func (o *OSDCollector) collectorList() []prometheus.Collector { o.UsedBytes, o.AvailBytes, o.Utilization, + o.Variance, o.Pgs, o.TotalBytes, o.TotalUsedBytes, @@ -244,6 +257,7 @@ type cephOSDDF struct { UsedKB json.Number `json:"kb_used"` AvailKB json.Number `json:"kb_avail"` Utilization json.Number `json:"utilization"` + Variance json.Number `json:"var"` Pgs json.Number `json:"pgs"` } `json:"nodes"` @@ -339,6 +353,13 @@ func (o *OSDCollector) collect() error { o.Utilization.WithLabelValues(node.Name).Set(util) + variance, err := node.Variance.Float64() + if err != nil { + return err + } + + o.Variance.WithLabelValues(node.Name).Set(variance) + pgs, err := node.Pgs.Float64() if err != nil { continue diff --git a/collectors/pool_usage.go b/collectors/pool_usage.go index 2764e25..7d543b0 100644 --- a/collectors/pool_usage.go +++ b/collectors/pool_usage.go @@ -31,6 +31,10 @@ type PoolUsageCollector struct { // does not factor in the overcommitment made for individual images. UsedBytes *prometheus.GaugeVec + // RawUsedBytes tracks the amount of raw bytes currently used for the pool. This + // factors in the replication factor (size) of the pool. + RawUsedBytes *prometheus.GaugeVec + // MaxAvail tracks the amount of bytes currently free for the pool, // which depends on the replication settings for the pool in question. MaxAvail *prometheus.GaugeVec @@ -38,11 +42,22 @@ type PoolUsageCollector struct { // Objects shows the no. of RADOS objects created within the pool. Objects *prometheus.GaugeVec + // DirtyObjects shows the no. of RADOS dirty objects in a cache-tier pool, + // this doesn't make sense in a regular pool, see: + // http://lists.ceph.com/pipermail/ceph-users-ceph.com/2015-April/000557.html + DirtyObjects *prometheus.GaugeVec + // ReadIO tracks the read IO calls made for the images within each pool. ReadIO *prometheus.CounterVec + // Readbytes tracks the read throughput made for the images within each pool. + ReadBytes *prometheus.CounterVec + // WriteIO tracks the write IO calls made for the images within each pool. WriteIO *prometheus.CounterVec + + // WriteBytes tracks the write throughput made for the images within each pool. + WriteBytes *prometheus.CounterVec } // NewPoolUsageCollector creates a new instance of PoolUsageCollector and returns @@ -64,6 +79,15 @@ func NewPoolUsageCollector(conn Conn) *PoolUsageCollector { }, poolLabel, ), + RawUsedBytes: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: cephNamespace, + Subsystem: subSystem, + Name: "raw_used_bytes", + Help: "Raw capacity of the pool that is currently under use, this factors in the size", + }, + poolLabel, + ), MaxAvail: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: cephNamespace, @@ -82,6 +106,15 @@ func NewPoolUsageCollector(conn Conn) *PoolUsageCollector { }, poolLabel, ), + DirtyObjects: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: cephNamespace, + Subsystem: subSystem, + Name: "dirty_objects_total", + Help: "Total no. of dirty objects in a cache-tier pool", + }, + poolLabel, + ), ReadIO: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: cephNamespace, @@ -91,6 +124,15 @@ func NewPoolUsageCollector(conn Conn) *PoolUsageCollector { }, poolLabel, ), + ReadBytes: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: cephNamespace, + Subsystem: subSystem, + Name: "read_bytes_total", + Help: "Total read throughput the pool has been subject to", + }, + poolLabel, + ), WriteIO: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: cephNamespace, @@ -100,16 +142,29 @@ func NewPoolUsageCollector(conn Conn) *PoolUsageCollector { }, poolLabel, ), + WriteBytes: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: cephNamespace, + Subsystem: subSystem, + Name: "write_bytes_total", + Help: "Total write throughput the pool has been subject to", + }, + poolLabel, + ), } } func (p *PoolUsageCollector) collectorList() []prometheus.Collector { return []prometheus.Collector{ p.UsedBytes, + p.RawUsedBytes, p.MaxAvail, p.Objects, + p.DirtyObjects, p.ReadIO, + p.ReadBytes, p.WriteIO, + p.WriteBytes, } } @@ -118,11 +173,15 @@ type cephPoolStats struct { Name string `json:"name"` ID int `json:"id"` Stats struct { - BytesUsed float64 `json:"bytes_used"` - MaxAvail float64 `json:"max_avail"` - Objects float64 `json:"objects"` - Read float64 `json:"rd"` - Write float64 `json:"wr"` + BytesUsed float64 `json:"bytes_used"` + RawBytesUsed float64 `json:"raw_bytes_used"` + MaxAvail float64 `json:"max_avail"` + Objects float64 `json:"objects"` + DirtyObjects float64 `json:"dirty"` + ReadIO float64 `json:"rd"` + ReadBytes float64 `json:"rd_bytes"` + WriteIO float64 `json:"wr"` + WriteBytes float64 `json:"wr_bytes"` } `json:"stats"` } `json:"pools"` } @@ -145,10 +204,14 @@ func (p *PoolUsageCollector) collect() error { for _, pool := range stats.Pools { p.UsedBytes.WithLabelValues(pool.Name).Set(pool.Stats.BytesUsed) + p.RawUsedBytes.WithLabelValues(pool.Name).Set(pool.Stats.RawBytesUsed) p.MaxAvail.WithLabelValues(pool.Name).Set(pool.Stats.MaxAvail) p.Objects.WithLabelValues(pool.Name).Set(pool.Stats.Objects) - p.ReadIO.WithLabelValues(pool.Name).Set(pool.Stats.Read) - p.WriteIO.WithLabelValues(pool.Name).Set(pool.Stats.Write) + p.DirtyObjects.WithLabelValues(pool.Name).Set(pool.Stats.DirtyObjects) + p.ReadIO.WithLabelValues(pool.Name).Set(pool.Stats.ReadIO) + p.ReadBytes.WithLabelValues(pool.Name).Set(pool.Stats.ReadBytes) + p.WriteIO.WithLabelValues(pool.Name).Set(pool.Stats.WriteIO) + p.WriteBytes.WithLabelValues(pool.Name).Set(pool.Stats.WriteBytes) } return nil