From 0956ff67eab66d7167cc5a553ab5c333b605895d Mon Sep 17 00:00:00 2001 From: tridao Date: Mon, 3 Jul 2023 19:13:48 -0300 Subject: [PATCH] Create per-access fragmentation metrics - Keeps track of read ops in the imitator - The metrics is defined as jumps per blocks read with jumps as the number of times having to stop reading continuous extents Signed-off-by: Tri Dao --- .../objectstore/Fragmentation_simulator.cc | 25 +++++- src/test/objectstore/ObjectStoreImitator.cc | 80 +++++++++++++++++-- src/test/objectstore/ObjectStoreImitator.h | 34 +++++--- 3 files changed, 119 insertions(+), 20 deletions(-) diff --git a/src/test/objectstore/Fragmentation_simulator.cc b/src/test/objectstore/Fragmentation_simulator.cc index 31ef2ab7f0e..1ad71cac40b 100644 --- a/src/test/objectstore/Fragmentation_simulator.cc +++ b/src/test/objectstore/Fragmentation_simulator.cc @@ -96,6 +96,7 @@ int FragmentationSimulator::begin_simulation_with_generators() { generators.clear(); os->print_status(); os->print_per_object_fragmentation(); + os->print_per_access_fragmentation(); return 0; } @@ -190,13 +191,17 @@ struct RandomCWGenerator : public FragmentationSimulator::WorkloadGenerator { t2.create(ch->get_cid(), obj2); tls.emplace_back(std::move(t2)); + os->queue_transactions(ch, tls); + os->verify_objects(ch); + gen_type rng(time(0)); boost::uniform_int<> u_size(0, _1Mb * 4); boost::uniform_int<> u_offset(0, _1Mb); for (unsigned i{0}; i < 200; ++i) { - ObjectStore::Transaction t3; + tls.clear(); + ObjectStore::Transaction t3; auto size = u_size(rng); auto offset = u_offset(rng); @@ -204,16 +209,28 @@ struct RandomCWGenerator : public FragmentationSimulator::WorkloadGenerator { tls.emplace_back(std::move(t3)); ObjectStore::Transaction t4; - size = u_size(rng); offset = u_offset(rng); t4.write(ch->get_cid(), obj2, offset, size, make_bl(size, 'c')); tls.emplace_back(std::move(t4)); + + os->queue_transactions(ch, tls); + os->verify_objects(ch); + + bufferlist dummy; + + size = u_size(rng); + offset = u_offset(rng); + os->read(ch, obj1, offset, size, dummy); + + dummy.clear(); + + size = u_size(rng); + offset = u_offset(rng); + os->read(ch, obj2, offset, size, dummy); } - os->queue_transactions(ch, tls); - os->verify_objects(ch); return 0; } }; diff --git a/src/test/objectstore/ObjectStoreImitator.cc b/src/test/objectstore/ObjectStoreImitator.cc index 42176f1a1e6..f409229c1ec 100644 --- a/src/test/objectstore/ObjectStoreImitator.cc +++ b/src/test/objectstore/ObjectStoreImitator.cc @@ -18,7 +18,6 @@ // ---------- Object ----------- void ObjectStoreImitator::Object::punch_hole(uint64_t offset, uint64_t length, - uint64_t min_alloc_size, PExtentVector &old_extents) { if (extent_map.empty()) return; @@ -207,6 +206,28 @@ void ObjectStoreImitator::print_per_object_fragmentation() { } } +void ObjectStoreImitator::print_per_access_fragmentation() { + for (auto &[_, coll_ref] : coll_map) { + for (auto &[id, read_ops] : coll_ref->read_ops) { + unsigned blks{0}, jmps{0}; + for (auto &op : read_ops) { + blks += op.blks; + jmps += op.jmps; + } + + double avg_total_blks = (double)blks / read_ops.size(); + double avg_jmps = (double)jmps / read_ops.size(); + double avg_jmps_per_blk = (double)jmps / (double)blks; + + std::cout << "Object: " << id.hobj.oid.name + << ", average total blks read: " << avg_total_blks + << ", average total jumps: " << avg_jmps + << ", average jumps per block: " << avg_jmps_per_blk + << std::endl; + } + } +} + // ------- Transactions ------- int ObjectStoreImitator::queue_transactions(CollectionHandle &ch, @@ -497,16 +518,63 @@ void ObjectStoreImitator::_assign_nid(ObjectRef &o) { int ObjectStoreImitator::_do_zero(CollectionRef &c, ObjectRef &o, uint64_t offset, size_t length) { PExtentVector old_extents; - o->punch_hole(offset, length, min_alloc_size, old_extents); + o->punch_hole(offset, length, old_extents); alloc->release(old_extents); return 0; } int ObjectStoreImitator::_do_read(Collection *c, ObjectRef &o, uint64_t offset, - size_t len, ceph::buffer::list &bl, + size_t length, ceph::buffer::list &bl, uint32_t op_flags, uint64_t retry_count) { - auto data = std::string(len, 'a'); + auto data = std::string(length, 'a'); bl.append(data); + + // Keeping track of read ops to evaluate per-access fragmentation + ReadOp op(offset, length); + bluestore_pextent_t last_ext; + uint64_t end = length + offset; + + auto it = o->extent_map.lower_bound(offset); + if ((it == o->extent_map.end() || it->first > offset) && + it != o->extent_map.begin()) { + it = std::prev(it); + + auto diff = offset - it->first; + if (diff < it->second.length) { + // end not in this extent + if (end > it->first + it->second.length) { + op.blks += div_round_up(it->second.length - diff, min_alloc_size); + } else { // end is within this extent so we take up the entire length + op.blks += div_round_up(length, min_alloc_size); + } + + last_ext = it->second; + it++; + } + } + + while (it != o->extent_map.end() && it->first < end) { + auto extent = it->second; + if (last_ext.length > 0 && + last_ext.offset + last_ext.length != extent.offset) { + op.jmps++; + } + + if (extent.length > length) { + op.blks += div_round_up(length, min_alloc_size); + break; + } + + op.blks += div_round_up(extent.length, min_alloc_size); + length -= extent.length; + it++; + } + + c->read_ops[o->oid].push_back(op); + // std::cout << "blks: " << op.blks << ", jmps: " << op.jmps + // << ", offset: " << op.offset << ", length: " << op.length + // << std::endl; + return bl.length(); } @@ -532,7 +600,7 @@ int ObjectStoreImitator::_do_write(CollectionRef &c, ObjectRef &o, length = p2align(length, min_alloc_size); PExtentVector punched; - o->punch_hole(offset, length, min_alloc_size, punched); + o->punch_hole(offset, length, punched); alloc->release(punched); // all writes will trigger an allocation @@ -639,7 +707,7 @@ void ObjectStoreImitator::_do_truncate(CollectionRef &c, ObjectRef &o, return; PExtentVector old_extents; - o->punch_hole(offset, o->size - offset, min_alloc_size, old_extents); + o->punch_hole(offset, o->size - offset, old_extents); o->size = offset; alloc->release(old_extents); } diff --git a/src/test/objectstore/ObjectStoreImitator.h b/src/test/objectstore/ObjectStoreImitator.h index 719d99dc15c..a98f5cc1c4e 100644 --- a/src/test/objectstore/ObjectStoreImitator.h +++ b/src/test/objectstore/ObjectStoreImitator.h @@ -50,18 +50,29 @@ private: uint64_t nid_ = 0, uint64_t size_ = 0) : c(c_), oid(oid_), exists(exists_), nid(nid_), size(size_) {} - void punch_hole(uint64_t offset, uint64_t length, uint64_t min_alloc_size, + void punch_hole(uint64_t offset, uint64_t length, PExtentVector &old_extents); void verify_extents(); void append(PExtentVector &ext, uint64_t offset); uint64_t ext_length(); }; - typedef boost::intrusive_ptr ObjectRef; + struct ReadOp { + uint64_t offset; + uint64_t length; + unsigned blks; + unsigned + jmps; // # of times we have to stop iterating over continuous extents + ReadOp(uint64_t offset = 0, uint64_t length = 0, unsigned blks = 0, + unsigned jmps = 0) + : offset(offset), length(length), blks(blks), jmps(jmps) {} + }; + struct Collection : public CollectionImpl { bluestore_cnode_t cnode; std::map objects; + std::unordered_map> read_ops; ceph::shared_mutex lock = ceph::make_shared_mutex( "FragmentationSimulator::Collection::lock", true, false); @@ -160,8 +171,6 @@ private: int _clone(CollectionRef &c, ObjectRef &oldo, ObjectRef &newo); int _clone_range(CollectionRef &c, ObjectRef &oldo, ObjectRef &newo, uint64_t srcoff, uint64_t length, uint64_t dstoff); - int read(CollectionHandle &c, const ghobject_t &oid, uint64_t offset, - size_t len, ceph::buffer::list &bl, uint32_t op_flags = 0) override; // Helpers @@ -171,7 +180,6 @@ private: uint32_t fadvise_flags); int _do_alloc_write(CollectionRef c, ObjectRef &o, bufferlist &bl, uint64_t offset, uint64_t length); - void _do_truncate(CollectionRef &c, ObjectRef &o, uint64_t offset); int _do_zero(CollectionRef &c, ObjectRef &o, uint64_t offset, size_t length); int _do_clone_range(CollectionRef &c, ObjectRef &oldo, ObjectRef &newo, @@ -210,19 +218,25 @@ public: void print_status(); void verify_objects(CollectionHandle &ch); - // Generate metrics for per-object fragmentation, defined by: - // frag_score = 1 - sum((size proportion of each extents / object size) ^ - // index of each extent in a vector sorted by descending length). - // This should only be called after the generators are finished as it will - // attempt to change an object's extents. + // Generate metrics for per-object fragmentation (how fragmented are each + // object's extents), defined by: frag_score = 1 - sum((size proportion of + // each extents / object size) ^ index of each extent in a vector sorted by + // descending length + 1). This should only be called after the generators + // are finished as it will attempt to change an object's extents. void print_per_object_fragmentation(); + // Genereate metrisc for per-access fragmentation, which is jumps/blocks read. + // Jumps are how many times we have to stop reading continuous extents + void print_per_access_fragmentation(); + // Overrides // This is often not called directly but through queue_transaction int queue_transactions(CollectionHandle &ch, std::vector &tls, TrackedOpRef op = TrackedOpRef(), ThreadPool::TPHandle *handle = NULL) override; + int read(CollectionHandle &c, const ghobject_t &oid, uint64_t offset, + size_t len, ceph::buffer::list &bl, uint32_t op_flags = 0) override; CollectionHandle open_collection(const coll_t &cid) override; CollectionHandle create_new_collection(const coll_t &cid) override; void set_collection_commit_queue(const coll_t &cid,