mirror of
https://github.com/ceph/ceph
synced 2025-02-21 18:17:42 +00:00
Merge PR #32667 into master
* refs/pull/32667/head: mds: track high water mark for purges qa: use correct variable for exception debug mds: mark purge queue protected members private Reviewed-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
commit
ad8c2461f8
@ -194,11 +194,10 @@ class TestStrays(CephFSTestCase):
|
|||||||
num_strays = mdc_stats['num_strays']
|
num_strays = mdc_stats['num_strays']
|
||||||
num_strays_purging = pq_stats['pq_executing']
|
num_strays_purging = pq_stats['pq_executing']
|
||||||
num_purge_ops = pq_stats['pq_executing_ops']
|
num_purge_ops = pq_stats['pq_executing_ops']
|
||||||
|
files_high_water = pq_stats['pq_executing_high_water']
|
||||||
|
ops_high_water = pq_stats['pq_executing_ops_high_water']
|
||||||
|
|
||||||
self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops])
|
self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops, files_high_water, ops_high_water])
|
||||||
|
|
||||||
files_high_water = max(files_high_water, num_strays_purging)
|
|
||||||
ops_high_water = max(ops_high_water, num_purge_ops)
|
|
||||||
|
|
||||||
total_strays_created = mdc_stats['strays_created']
|
total_strays_created = mdc_stats['strays_created']
|
||||||
total_strays_purged = pq_stats['pq_executed']
|
total_strays_purged = pq_stats['pq_executed']
|
||||||
@ -242,11 +241,18 @@ class TestStrays(CephFSTestCase):
|
|||||||
raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format(
|
raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format(
|
||||||
ops_high_water, mds_max_purge_ops
|
ops_high_water, mds_max_purge_ops
|
||||||
))
|
))
|
||||||
|
# The MDS may go over mds_max_purge_ops for some items, like a
|
||||||
|
# heavily fragmented directory. The throttle does not kick in
|
||||||
|
# until *after* we reach or exceed the limit. This is expected
|
||||||
|
# because we don't want to starve the PQ or never purge a
|
||||||
|
# particularly large file/directory.
|
||||||
|
self.assertLessEqual(ops_high_water, mds_max_purge_ops+64)
|
||||||
elif throttle_type == self.FILES_THROTTLE:
|
elif throttle_type == self.FILES_THROTTLE:
|
||||||
if files_high_water < mds_max_purge_files / 2:
|
if files_high_water < mds_max_purge_files / 2:
|
||||||
raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format(
|
raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format(
|
||||||
ops_high_water, mds_max_purge_files
|
files_high_water, mds_max_purge_files
|
||||||
))
|
))
|
||||||
|
self.assertLessEqual(files_high_water, mds_max_purge_files)
|
||||||
|
|
||||||
# Sanity check all MDC stray stats
|
# Sanity check all MDC stray stats
|
||||||
stats = self.fs.mds_asok(['perf', 'dump'])
|
stats = self.fs.mds_asok(['perf', 'dump'])
|
||||||
|
@ -114,7 +114,9 @@ void PurgeQueue::create_logger()
|
|||||||
|
|
||||||
pcb.set_prio_default(PerfCountersBuilder::PRIO_USEFUL);
|
pcb.set_prio_default(PerfCountersBuilder::PRIO_USEFUL);
|
||||||
pcb.add_u64(l_pq_executing_ops, "pq_executing_ops", "Purge queue ops in flight");
|
pcb.add_u64(l_pq_executing_ops, "pq_executing_ops", "Purge queue ops in flight");
|
||||||
|
pcb.add_u64(l_pq_executing_ops_high_water, "pq_executing_ops_high_water", "Maximum number of executing file purge ops");
|
||||||
pcb.add_u64(l_pq_executing, "pq_executing", "Purge queue tasks in flight");
|
pcb.add_u64(l_pq_executing, "pq_executing", "Purge queue tasks in flight");
|
||||||
|
pcb.add_u64(l_pq_executing_high_water, "pq_executing_high_water", "Maximum number of executing file purges");
|
||||||
pcb.add_u64(l_pq_item_in_journal, "pq_item_in_journal", "Purge item left in journal");
|
pcb.add_u64(l_pq_item_in_journal, "pq_item_in_journal", "Purge item left in journal");
|
||||||
|
|
||||||
logger.reset(pcb.create_perf_counters());
|
logger.reset(pcb.create_perf_counters());
|
||||||
@ -475,9 +477,13 @@ void PurgeQueue::_execute_item(
|
|||||||
|
|
||||||
in_flight[expire_to] = item;
|
in_flight[expire_to] = item;
|
||||||
logger->set(l_pq_executing, in_flight.size());
|
logger->set(l_pq_executing, in_flight.size());
|
||||||
|
files_high_water = std::max(files_high_water, in_flight.size());
|
||||||
|
logger->set(l_pq_executing_high_water, files_high_water);
|
||||||
auto ops = _calculate_ops(item);
|
auto ops = _calculate_ops(item);
|
||||||
ops_in_flight += ops;
|
ops_in_flight += ops;
|
||||||
logger->set(l_pq_executing_ops, ops_in_flight);
|
logger->set(l_pq_executing_ops, ops_in_flight);
|
||||||
|
ops_high_water = std::max(ops_high_water, ops_in_flight);
|
||||||
|
logger->set(l_pq_executing_ops_high_water, ops_high_water);
|
||||||
|
|
||||||
SnapContext nullsnapc;
|
SnapContext nullsnapc;
|
||||||
|
|
||||||
@ -545,8 +551,12 @@ void PurgeQueue::_execute_item(
|
|||||||
"dropping it" << dendl;
|
"dropping it" << dendl;
|
||||||
ops_in_flight -= ops;
|
ops_in_flight -= ops;
|
||||||
logger->set(l_pq_executing_ops, ops_in_flight);
|
logger->set(l_pq_executing_ops, ops_in_flight);
|
||||||
|
ops_high_water = std::max(ops_high_water, ops_in_flight);
|
||||||
|
logger->set(l_pq_executing_ops_high_water, ops_high_water);
|
||||||
in_flight.erase(expire_to);
|
in_flight.erase(expire_to);
|
||||||
logger->set(l_pq_executing, in_flight.size());
|
logger->set(l_pq_executing, in_flight.size());
|
||||||
|
files_high_water = std::max(files_high_water, in_flight.size());
|
||||||
|
logger->set(l_pq_executing_high_water, files_high_water);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ceph_assert(gather.has_subs());
|
ceph_assert(gather.has_subs());
|
||||||
@ -610,11 +620,15 @@ void PurgeQueue::_execute_item_complete(
|
|||||||
|
|
||||||
ops_in_flight -= _calculate_ops(iter->second);
|
ops_in_flight -= _calculate_ops(iter->second);
|
||||||
logger->set(l_pq_executing_ops, ops_in_flight);
|
logger->set(l_pq_executing_ops, ops_in_flight);
|
||||||
|
ops_high_water = std::max(ops_high_water, ops_in_flight);
|
||||||
|
logger->set(l_pq_executing_ops_high_water, ops_high_water);
|
||||||
|
|
||||||
dout(10) << "completed item for ino " << iter->second.ino << dendl;
|
dout(10) << "completed item for ino " << iter->second.ino << dendl;
|
||||||
|
|
||||||
in_flight.erase(iter);
|
in_flight.erase(iter);
|
||||||
logger->set(l_pq_executing, in_flight.size());
|
logger->set(l_pq_executing, in_flight.size());
|
||||||
|
files_high_water = std::max(files_high_water, in_flight.size());
|
||||||
|
logger->set(l_pq_executing_high_water, files_high_water);
|
||||||
dout(10) << "in_flight.size() now " << in_flight.size() << dendl;
|
dout(10) << "in_flight.size() now " << in_flight.size() << dendl;
|
||||||
|
|
||||||
uint64_t write_pos = journaler.get_write_pos();
|
uint64_t write_pos = journaler.get_write_pos();
|
||||||
|
@ -85,7 +85,9 @@ enum {
|
|||||||
|
|
||||||
// How many items have been finished by PurgeQueue
|
// How many items have been finished by PurgeQueue
|
||||||
l_pq_executing_ops,
|
l_pq_executing_ops,
|
||||||
|
l_pq_executing_ops_high_water,
|
||||||
l_pq_executing,
|
l_pq_executing,
|
||||||
|
l_pq_executing_high_water,
|
||||||
l_pq_executed,
|
l_pq_executed,
|
||||||
l_pq_item_in_journal,
|
l_pq_item_in_journal,
|
||||||
l_pq_last
|
l_pq_last
|
||||||
@ -152,7 +154,7 @@ public:
|
|||||||
|
|
||||||
void handle_conf_change(const std::set<std::string>& changed, const MDSMap& mds_map);
|
void handle_conf_change(const std::set<std::string>& changed, const MDSMap& mds_map);
|
||||||
|
|
||||||
protected:
|
private:
|
||||||
uint32_t _calculate_ops(const PurgeItem &item) const;
|
uint32_t _calculate_ops(const PurgeItem &item) const;
|
||||||
|
|
||||||
bool _can_consume();
|
bool _can_consume();
|
||||||
@ -215,5 +217,8 @@ protected:
|
|||||||
std::vector<Context*> waiting_for_recovery;
|
std::vector<Context*> waiting_for_recovery;
|
||||||
|
|
||||||
size_t purge_item_journal_size;
|
size_t purge_item_journal_size;
|
||||||
|
|
||||||
|
uint64_t ops_high_water = 0;
|
||||||
|
uint64_t files_high_water = 0;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user