From 47248f35547827dbc12c860e07d29b3b0b8e4974 Mon Sep 17 00:00:00 2001 From: Zhiqiang Wang Date: Fri, 20 Jan 2017 16:26:12 +0800 Subject: [PATCH] common: support tracking slow op Add slow op in op tracker history. Signed-off-by: Zhiqiang Wang --- src/common/TrackedOp.cc | 9 +++++++++ src/common/TrackedOp.h | 14 +++++++++++++- src/common/config_opts.h | 2 ++ src/osd/OSD.cc | 7 +++++++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc index 1b088279308..ee1f4391568 100644 --- a/src/common/TrackedOp.cc +++ b/src/common/TrackedOp.cc @@ -34,6 +34,7 @@ void OpHistory::on_shutdown() Mutex::Locker history_lock(ops_history_lock); arrived.clear(); duration.clear(); + slow_op.clear(); shutdown = true; } @@ -44,6 +45,8 @@ void OpHistory::insert(utime_t now, TrackedOpRef op) return; duration.insert(make_pair(op->get_duration(), op)); arrived.insert(make_pair(op->get_initiated(), op)); + if (op->get_duration() >= history_slow_op_threshold) + slow_op.insert(make_pair(op->get_initiated(), op)); cleanup(now); } @@ -64,6 +67,12 @@ void OpHistory::cleanup(utime_t now) duration.begin()->second)); duration.erase(duration.begin()); } + + while (slow_op.size() > history_slow_op_size) { + slow_op.erase(make_pair( + slow_op.begin()->second->get_initiated(), + slow_op.begin()->second)); + } } void OpHistory::dump_ops(utime_t now, Formatter *f) diff --git a/src/common/TrackedOp.h b/src/common/TrackedOp.h index 412f1d324ef..5342e827381 100644 --- a/src/common/TrackedOp.h +++ b/src/common/TrackedOp.h @@ -33,18 +33,23 @@ typedef boost::intrusive_ptr TrackedOpRef; class OpHistory { set > arrived; set > duration; + set > slow_op; Mutex ops_history_lock; void cleanup(utime_t now); bool shutdown; uint32_t history_size; uint32_t history_duration; + uint32_t history_slow_op_size; + uint32_t history_slow_op_threshold; public: OpHistory() : ops_history_lock("OpHistory::Lock"), shutdown(false), - history_size(0), history_duration(0) {} + history_size(0), history_duration(0), + history_slow_op_size(0), history_slow_op_threshold(0) {} ~OpHistory() { assert(arrived.empty()); assert(duration.empty()); + assert(slow_op.empty()); } void insert(utime_t now, TrackedOpRef op); void dump_ops(utime_t now, Formatter *f); @@ -54,6 +59,10 @@ public: history_size = new_size; history_duration = new_duration; } + void set_slow_op_size_and_threshold(uint32_t new_size, uint32_t new_threshold) { + history_slow_op_size = new_size; + history_slow_op_threshold = new_threshold; + } }; struct ShardedTrackingData; @@ -79,6 +88,9 @@ public: void set_history_size_and_duration(uint32_t new_size, uint32_t new_duration) { history.set_size_and_duration(new_size, new_duration); } + void set_history_slow_op_size_and_threshold(uint32_t new_size, uint32_t new_threshold) { + history.set_slow_op_size_and_threshold(new_size, new_threshold); + } void set_tracking(bool enable) { RWLock::WLocker l(lock); tracking_enabled = enable; diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 836af93ebad..90ac9b89895 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -880,6 +880,8 @@ OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track +OPTION(osd_op_history_slow_op_size, OPT_U32, 20) // Max number of slow ops to track +OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE, 10.0) // track the op if over this threshold OPTION(osd_target_transaction_size, OPT_INT, 30) // to adjust various transactions that batch smaller items OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe) OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 809e8f21e5d..2eb9e4078d5 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1883,6 +1883,8 @@ OSD::OSD(CephContext *cct_, ObjectStore *store_, cct->_conf->osd_op_log_threshold); op_tracker.set_history_size_and_duration(cct->_conf->osd_op_history_size, cct->_conf->osd_op_history_duration); + op_tracker.set_history_slow_op_size_and_threshold(cct->_conf->osd_op_history_slow_op_size, + cct->_conf->osd_op_history_slow_op_threshold); } OSD::~OSD() @@ -8987,6 +8989,11 @@ void OSD::handle_conf_change(const struct md_config_t *conf, op_tracker.set_history_size_and_duration(cct->_conf->osd_op_history_size, cct->_conf->osd_op_history_duration); } + if (changed.count("osd_op_history_slow_op_size") || + changed.count("osd_op_history_slow_op_threshold")) { + op_tracker.set_history_slow_op_size_and_threshold(cct->_conf->osd_op_history_slow_op_size, + cct->_conf->osd_op_history_slow_op_threshold); + } if (changed.count("osd_enable_op_tracker")) { op_tracker.set_tracking(cct->_conf->osd_enable_op_tracker); }