// Copyright 2017 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // +build !nobcache package collector import ( "fmt" "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs/bcache" "gopkg.in/alecthomas/kingpin.v2" ) var ( priorityStats = kingpin.Flag("collector.bcache.priorityStats", "Expose expensive priority stats.").Bool() ) func init() { registerCollector("bcache", defaultEnabled, NewBcacheCollector) } // A bcacheCollector is a Collector which gathers metrics from Linux bcache. type bcacheCollector struct { fs bcache.FS logger log.Logger } // NewBcacheCollector returns a newly allocated bcacheCollector. // It exposes a number of Linux bcache statistics. func NewBcacheCollector(logger log.Logger) (Collector, error) { fs, err := bcache.NewFS(*sysPath) if err != nil { return nil, fmt.Errorf("failed to open sysfs: %w", err) } return &bcacheCollector{ fs: fs, logger: logger, }, nil } // Update reads and exposes bcache stats. // It implements the Collector interface. func (c *bcacheCollector) Update(ch chan<- prometheus.Metric) error { var stats []*bcache.Stats var err error if *priorityStats { stats, err = c.fs.Stats() } else { stats, err = c.fs.StatsWithoutPriority() } if err != nil { return fmt.Errorf("failed to retrieve bcache stats: %w", err) } for _, s := range stats { c.updateBcacheStats(ch, s) } return nil } type bcacheMetric struct { name string desc string value float64 metricType prometheus.ValueType extraLabel []string extraLabelValue string } func bcachePeriodStatsToMetric(ps *bcache.PeriodStats, labelValue string) []bcacheMetric { label := []string{"backing_device"} metrics := []bcacheMetric{ { name: "bypassed_bytes_total", desc: "Amount of IO (both reads and writes) that has bypassed the cache.", value: float64(ps.Bypassed), metricType: prometheus.CounterValue, extraLabel: label, extraLabelValue: labelValue, }, { name: "cache_hits_total", desc: "Hits counted per individual IO as bcache sees them.", value: float64(ps.CacheHits), metricType: prometheus.CounterValue, extraLabel: label, extraLabelValue: labelValue, }, { name: "cache_misses_total", desc: "Misses counted per individual IO as bcache sees them.", value: float64(ps.CacheMisses), metricType: prometheus.CounterValue, extraLabel: label, extraLabelValue: labelValue, }, { name: "cache_bypass_hits_total", desc: "Hits for IO intended to skip the cache.", value: float64(ps.CacheBypassHits), metricType: prometheus.CounterValue, extraLabel: label, extraLabelValue: labelValue, }, { name: "cache_bypass_misses_total", desc: "Misses for IO intended to skip the cache.", value: float64(ps.CacheBypassMisses), metricType: prometheus.CounterValue, extraLabel: label, extraLabelValue: labelValue, }, { name: "cache_miss_collisions_total", desc: "Instances where data insertion from cache miss raced with write (data already present).", value: float64(ps.CacheMissCollisions), metricType: prometheus.CounterValue, extraLabel: label, extraLabelValue: labelValue, }, { name: "cache_readaheads_total", desc: "Count of times readahead occurred.", value: float64(ps.CacheReadaheads), metricType: prometheus.CounterValue, extraLabel: label, extraLabelValue: labelValue, }, } return metrics } // UpdateBcacheStats collects statistics for one bcache ID. func (c *bcacheCollector) updateBcacheStats(ch chan<- prometheus.Metric, s *bcache.Stats) { const ( subsystem = "bcache" ) var ( devLabel = []string{"uuid"} allMetrics []bcacheMetric metrics []bcacheMetric ) allMetrics = []bcacheMetric{ // metrics in /sys/fs/bcache// { name: "average_key_size_sectors", desc: "Average data per key in the btree (sectors).", value: float64(s.Bcache.AverageKeySize), metricType: prometheus.GaugeValue, }, { name: "btree_cache_size_bytes", desc: "Amount of memory currently used by the btree cache.", value: float64(s.Bcache.BtreeCacheSize), metricType: prometheus.GaugeValue, }, { name: "cache_available_percent", desc: "Percentage of cache device without dirty data, usable for writeback (may contain clean cached data).", value: float64(s.Bcache.CacheAvailablePercent), metricType: prometheus.GaugeValue, }, { name: "congested", desc: "Congestion.", value: float64(s.Bcache.Congested), metricType: prometheus.GaugeValue, }, { name: "root_usage_percent", desc: "Percentage of the root btree node in use (tree depth increases if too high).", value: float64(s.Bcache.RootUsagePercent), metricType: prometheus.GaugeValue, }, { name: "tree_depth", desc: "Depth of the btree.", value: float64(s.Bcache.TreeDepth), metricType: prometheus.GaugeValue, }, // metrics in /sys/fs/bcache//internal/ { name: "active_journal_entries", desc: "Number of journal entries that are newer than the index.", value: float64(s.Bcache.Internal.ActiveJournalEntries), metricType: prometheus.GaugeValue, }, { name: "btree_nodes", desc: "Total nodes in the btree.", value: float64(s.Bcache.Internal.BtreeNodes), metricType: prometheus.GaugeValue, }, { name: "btree_read_average_duration_seconds", desc: "Average btree read duration.", value: float64(s.Bcache.Internal.BtreeReadAverageDurationNanoSeconds) * 1e-9, metricType: prometheus.GaugeValue, }, { name: "cache_read_races_total", desc: "Counts instances where while data was being read from the cache, the bucket was reused and invalidated - i.e. where the pointer was stale after the read completed.", value: float64(s.Bcache.Internal.CacheReadRaces), metricType: prometheus.CounterValue, }, } for _, bdev := range s.Bdevs { // metrics in /sys/fs/bcache/// metrics = []bcacheMetric{ { name: "dirty_data_bytes", desc: "Amount of dirty data for this backing device in the cache.", value: float64(bdev.DirtyData), metricType: prometheus.GaugeValue, extraLabel: []string{"backing_device"}, extraLabelValue: bdev.Name, }, { name: "dirty_target_bytes", desc: "Current dirty data target threshold for this backing device in bytes.", value: float64(bdev.WritebackRateDebug.Target), metricType: prometheus.GaugeValue, extraLabel: []string{"backing_device"}, extraLabelValue: bdev.Name, }, { name: "writeback_rate", desc: "Current writeback rate for this backing device in bytes.", value: float64(bdev.WritebackRateDebug.Rate), metricType: prometheus.GaugeValue, extraLabel: []string{"backing_device"}, extraLabelValue: bdev.Name, }, { name: "writeback_rate_proportional_term", desc: "Current result of proportional controller, part of writeback rate", value: float64(bdev.WritebackRateDebug.Proportional), metricType: prometheus.GaugeValue, extraLabel: []string{"backing_device"}, extraLabelValue: bdev.Name, }, { name: "writeback_rate_integral_term", desc: "Current result of integral controller, part of writeback rate", value: float64(bdev.WritebackRateDebug.Integral), metricType: prometheus.GaugeValue, extraLabel: []string{"backing_device"}, extraLabelValue: bdev.Name, }, { name: "writeback_change", desc: "Last writeback rate change step for this backing device.", value: float64(bdev.WritebackRateDebug.Change), metricType: prometheus.GaugeValue, extraLabel: []string{"backing_device"}, extraLabelValue: bdev.Name, }, } allMetrics = append(allMetrics, metrics...) // metrics in /sys/fs/bcache///stats_total metrics := bcachePeriodStatsToMetric(&bdev.Total, bdev.Name) allMetrics = append(allMetrics, metrics...) } for _, cache := range s.Caches { metrics = []bcacheMetric{ // metrics in /sys/fs/bcache/// { name: "io_errors", desc: "Number of errors that have occurred, decayed by io_error_halflife.", value: float64(cache.IOErrors), metricType: prometheus.GaugeValue, extraLabel: []string{"cache_device"}, extraLabelValue: cache.Name, }, { name: "metadata_written_bytes_total", desc: "Sum of all non data writes (btree writes and all other metadata).", value: float64(cache.MetadataWritten), metricType: prometheus.CounterValue, extraLabel: []string{"cache_device"}, extraLabelValue: cache.Name, }, { name: "written_bytes_total", desc: "Sum of all data that has been written to the cache.", value: float64(cache.Written), metricType: prometheus.CounterValue, extraLabel: []string{"cache_device"}, extraLabelValue: cache.Name, }, } if *priorityStats { // metrics in /sys/fs/bcache///priority_stats priorityStatsMetrics := []bcacheMetric{ { name: "priority_stats_unused_percent", desc: "The percentage of the cache that doesn't contain any data.", value: float64(cache.Priority.UnusedPercent), metricType: prometheus.GaugeValue, extraLabel: []string{"cache_device"}, extraLabelValue: cache.Name, }, { name: "priority_stats_metadata_percent", desc: "Bcache's metadata overhead.", value: float64(cache.Priority.MetadataPercent), metricType: prometheus.GaugeValue, extraLabel: []string{"cache_device"}, extraLabelValue: cache.Name, }, } metrics = append(metrics, priorityStatsMetrics...) } allMetrics = append(allMetrics, metrics...) } for _, m := range allMetrics { labels := append(devLabel, m.extraLabel...) desc := prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, m.name), m.desc, labels, nil, ) labelValues := []string{s.Name} if m.extraLabelValue != "" { labelValues = append(labelValues, m.extraLabelValue) } ch <- prometheus.MustNewConstMetric( desc, m.metricType, m.value, labelValues..., ) } }