diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index 89e1ae316de..9b50b345adf 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -27,12 +27,10 @@ add_library(crimson-seastore STATIC onode_manager/staged-fltree/super.cc onode_manager/staged-fltree/value.cc onode_manager/staged-fltree/fltree_onode_manager.cc - extentmap_manager.cc - extentmap_manager/btree/extentmap_btree_node_impl.cc - extentmap_manager/btree/btree_extentmap_manager.cc collection_manager.cc collection_manager/flat_collection_manager.cc collection_manager/collection_flat_node.cc + object_data_handler.cc seastore.cc ../../../test/crimson/seastore/test_block.cc ${PROJECT_SOURCE_DIR}/src/os/Transaction.cc diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 81924972eb9..4745b7d14f1 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -6,9 +6,9 @@ // included for get_extent_by_type #include "crimson/os/seastore/collection_manager/collection_flat_node.h" -#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h" #include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h" #include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h" +#include "crimson/os/seastore/object_data_handler.h" #include "crimson/os/seastore/collection_manager/collection_flat_node.h" #include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h" #include "test/crimson/seastore/test_block.h" @@ -144,16 +144,14 @@ CachedExtentRef Cache::alloc_new_extent_by_type( return alloc_new_extent(t, length); case extent_types_t::ONODE_BLOCK_STAGED: return alloc_new_extent(t, length); - case extent_types_t::EXTMAP_INNER: - return alloc_new_extent(t, length); - case extent_types_t::EXTMAP_LEAF: - return alloc_new_extent(t, length); case extent_types_t::OMAP_INNER: return alloc_new_extent(t, length); case extent_types_t::OMAP_LEAF: return alloc_new_extent(t, length); case extent_types_t::COLL_BLOCK: return alloc_new_extent(t, length); + case extent_types_t::OBJECT_DATA_BLOCK: + return alloc_new_extent(t, length); case extent_types_t::TEST_BLOCK: return alloc_new_extent(t, length); case extent_types_t::TEST_BLOCK_PHYSICAL: @@ -529,16 +527,6 @@ Cache::get_extent_ertr::future Cache::get_extent_by_type( ).safe_then([](auto extent) { return CachedExtentRef(extent.detach(), false /* add_ref */); }); - case extent_types_t::EXTMAP_INNER: - return get_extent(offset, length - ).safe_then([](auto extent) { - return CachedExtentRef(extent.detach(), false /* add_ref */); - }); - case extent_types_t::EXTMAP_LEAF: - return get_extent(offset, length - ).safe_then([](auto extent) { - return CachedExtentRef(extent.detach(), false /* add_ref */); - }); case extent_types_t::OMAP_INNER: return get_extent(offset, length ).safe_then([](auto extent) { @@ -559,6 +547,11 @@ Cache::get_extent_ertr::future Cache::get_extent_by_type( ).safe_then([](auto extent) { return CachedExtentRef(extent.detach(), false /* add_ref */); }); + case extent_types_t::OBJECT_DATA_BLOCK: + return get_extent(offset, length + ).safe_then([](auto extent) { + return CachedExtentRef(extent.detach(), false /* add_ref */); + }); case extent_types_t::TEST_BLOCK: return get_extent(offset, length ).safe_then([](auto extent) { diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 3125cc1e2e0..1b0c00f5113 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -173,7 +173,10 @@ public: std::move(ref)); }, get_extent_ertr::pass_further{}, - crimson::ct_error::discard_all{}); + crimson::ct_error::assert_all{ + "Cache::get_extent: invalid error" + } + ); } } diff --git a/src/crimson/os/seastore/collection_manager/flat_collection_manager.cc b/src/crimson/os/seastore/collection_manager/flat_collection_manager.cc index 08cf84ffa33..40c66487788 100644 --- a/src/crimson/os/seastore/collection_manager/flat_collection_manager.cc +++ b/src/crimson/os/seastore/collection_manager/flat_collection_manager.cc @@ -47,13 +47,11 @@ FlatCollectionManager::get_coll_root(const coll_root_t &coll_root, Transaction & logger().debug("FlatCollectionManager: {}", __func__); assert(coll_root.get_location() != L_ADDR_NULL); auto cc = get_coll_context(t); - return cc.tm.read_extents( + return cc.tm.read_extent( cc.t, coll_root.get_location(), coll_root.get_size() - ).safe_then([](auto&& extents) { - assert(extents.size() == 1); - [[maybe_unused]] auto [laddr, e] = extents.front(); + ).safe_then([](auto&& e) { return get_root_ertr::make_ready_future(std::move(e)); }); } diff --git a/src/crimson/os/seastore/extentmap_manager.cc b/src/crimson/os/seastore/extentmap_manager.cc index 32de3a6edfb..b0dc1b8c8a8 100644 --- a/src/crimson/os/seastore/extentmap_manager.cc +++ b/src/crimson/os/seastore/extentmap_manager.cc @@ -8,7 +8,8 @@ #include "crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h" namespace crimson::os::seastore::extentmap_manager { -ExtentMapManagerRef create_extentmap_manager(TransactionManager &trans_manager) { +ExtentMapManagerRef create_extentmap_manager( + TransactionManager &trans_manager) { return ExtentMapManagerRef(new BtreeExtentMapManager(trans_manager)); } diff --git a/src/crimson/os/seastore/extentmap_manager.h b/src/crimson/os/seastore/extentmap_manager.h deleted file mode 100644 index 7d5223b9481..00000000000 --- a/src/crimson/os/seastore/extentmap_manager.h +++ /dev/null @@ -1,124 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#pragma once - -#include -#include - -#include -#include - -#include - -#include "crimson/osd/exceptions.h" -#include "crimson/os/seastore/seastore_types.h" -#include "crimson/os/seastore/transaction_manager.h" - -#define PAGE_SIZE 4096 -#define EXTMAP_BLOCK_SIZE 4096 - -namespace crimson::os::seastore { - -struct lext_map_val_t { - laddr_t laddr; - extent_len_t length = 0; - - lext_map_val_t( - laddr_t laddr, - extent_len_t length) - : laddr(laddr), length(length) {} - -}; - -class extent_mapping_t -{ -public: - objaddr_t logical_offset = 0; //offset in object - laddr_t laddr; // lextent start address aligned with block size. - extent_len_t length = 0; - explicit extent_mapping_t(objaddr_t lo) : logical_offset(lo) { } - - extent_mapping_t( - objaddr_t lo, - laddr_t laddr, - extent_len_t length) - : logical_offset(lo), laddr(laddr), length(length) {} - - ~extent_mapping_t() {} -}; - -enum class extmap_root_state_t : uint8_t { - INITIAL = 0, - MUTATED = 1, - NONE = 0xFF -}; - -using extent_map_list_t = std::list; -std::ostream &operator<<(std::ostream &out, const extent_mapping_t &rhs); -std::ostream &operator<<(std::ostream &out, const extent_map_list_t &rhs); - -struct extmap_root_t { - depth_t depth = 0; - extmap_root_state_t state; - laddr_t extmap_root_laddr; - extmap_root_t(depth_t dep, laddr_t laddr) - : depth(dep), - extmap_root_laddr(laddr) { state = extmap_root_state_t::INITIAL; } -}; - -/** - * Abstract interface for managing the object inner offset to logical addr mapping - * each onode has an extentmap tree for a particular onode. - */ -class ExtentMapManager { -public: - using initialize_extmap_ertr = TransactionManager::alloc_extent_ertr; - using initialize_extmap_ret = initialize_extmap_ertr::future; - virtual initialize_extmap_ret initialize_extmap(Transaction &t) = 0; - - /* find_lextents - * - * Return a list of all extent_mapping_t overlapping any portion of lo~len. - * or if not find any overlap extent_mapping_t will return the next extent after the range. - */ - using find_lextent_ertr = TransactionManager::read_extent_ertr; - using find_lextent_ret = find_lextent_ertr::future; - virtual find_lextent_ret - find_lextent(const extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, extent_len_t len) = 0; - - /* add_lextent - * - * add a new mapping (object offset -> laddr, length) to extent map - * return the added extent_mapping_t - */ - using add_lextent_ertr = TransactionManager::read_extent_ertr; - using add_lextent_ret = add_lextent_ertr::future; - virtual add_lextent_ret - add_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) = 0; - - /* rm_lextent - * - * remove an existing extent mapping from extent map - * return true if the extent mapping is removed, otherwise return false - */ - using rm_lextent_ertr = TransactionManager::read_extent_ertr; - using rm_lextent_ret = rm_lextent_ertr::future; - virtual rm_lextent_ret rm_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) = 0; - - virtual ~ExtentMapManager() {} -}; -using ExtentMapManagerRef = std::unique_ptr; - -namespace extentmap_manager { -/* creat ExtentMapManager for an extentmap - * if it is a new extmap after create_extentmap_manager need call initialize_extmap - * to initialize the extent map before use it - * if it is an exsiting extmap, needn't initialize_extmap - */ -ExtentMapManagerRef create_extentmap_manager( - TransactionManager &trans_manager); - -} - -} diff --git a/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.cc b/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.cc deleted file mode 100644 index f7609d3e8b5..00000000000 --- a/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.cc +++ /dev/null @@ -1,118 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -#include -#include - -#include "crimson/common/log.h" - -#include "include/buffer.h" -#include "crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h" -#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h" - -namespace { - seastar::logger& logger() { - return crimson::get_logger(ceph_subsys_filestore); - } -} - -namespace crimson::os::seastore::extentmap_manager { - -BtreeExtentMapManager::BtreeExtentMapManager( - TransactionManager &tm) - : tm(tm) {} - -BtreeExtentMapManager::initialize_extmap_ret -BtreeExtentMapManager::initialize_extmap(Transaction &t) -{ - - logger().debug("{}", __func__); - return tm.alloc_extent(t, L_ADDR_MIN, EXTMAP_BLOCK_SIZE) - .safe_then([](auto&& root_extent) { - root_extent->set_size(0); - extmap_node_meta_t meta{1}; - root_extent->set_meta(meta); - extmap_root_t extmap_root = extmap_root_t(1, root_extent->get_laddr()); - return initialize_extmap_ertr::make_ready_future(extmap_root); - }); -} - -BtreeExtentMapManager::get_root_ret -BtreeExtentMapManager::get_extmap_root(const extmap_root_t &extmap_root, Transaction &t) -{ - assert(extmap_root.extmap_root_laddr != L_ADDR_NULL); - laddr_t laddr = extmap_root.extmap_root_laddr; - return extmap_load_extent(get_ext_context(t), laddr, extmap_root.depth); -} - -BtreeExtentMapManager::find_lextent_ret -BtreeExtentMapManager::find_lextent(const extmap_root_t &extmap_root, Transaction &t, - objaddr_t lo, extent_len_t len) -{ - logger().debug("{}: {}, {}", __func__, lo, len); - return get_extmap_root(extmap_root, t).safe_then([this, &t, lo, len](auto&& extent) { - return extent->find_lextent(get_ext_context(t), lo, len); - }).safe_then([](auto &&e) { - logger().debug("{}: found_lextent {}", __func__, e); - return find_lextent_ret( - find_lextent_ertr::ready_future_marker{}, - std::move(e)); - }); - -} - -BtreeExtentMapManager::add_lextent_ret -BtreeExtentMapManager::add_lextent(extmap_root_t &extmap_root, Transaction &t, - objaddr_t lo, lext_map_val_t val) -{ - logger().debug("{}: {}, {}, {}", __func__, lo, val.laddr, val.length); - return get_extmap_root(extmap_root, t).safe_then([this, &extmap_root, &t, lo, val](auto &&root) { - return insert_lextent(extmap_root, t, root, lo, val); - }).safe_then([](auto ret) { - logger().debug("{}: {}", __func__, ret); - return add_lextent_ret( - add_lextent_ertr::ready_future_marker{}, - std::move(ret)); - }); - -} - -BtreeExtentMapManager::insert_lextent_ret -BtreeExtentMapManager::insert_lextent(extmap_root_t &extmap_root, Transaction &t, - ExtMapNodeRef root, objaddr_t logical_offset, lext_map_val_t val) -{ - auto split = insert_lextent_ertr::make_ready_future(root); - if (root->at_max_capacity()) { - logger().debug("{}::splitting root {}", __func__, *root); - split = root->extmap_alloc_extent(get_ext_context(t), EXTMAP_BLOCK_SIZE) - .safe_then([this, &extmap_root, root, &t, logical_offset](auto&& nroot) { - extmap_node_meta_t meta{root->get_node_meta().depth + 1}; - nroot->set_meta(meta); - nroot->journal_insert(nroot->begin(), OBJ_ADDR_MIN, - root->get_laddr(), nullptr); - extmap_root.extmap_root_laddr = nroot->get_laddr(); - extmap_root.depth = root->get_node_meta().depth + 1; - extmap_root.state = extmap_root_state_t::MUTATED; - return nroot->split_entry(get_ext_context(t), logical_offset, nroot->begin(), root); - }); - } - return split.safe_then([this, &t, logical_offset, val](ExtMapNodeRef node) { - return node->insert(get_ext_context(t), logical_offset, val); - }); -} - -BtreeExtentMapManager::rm_lextent_ret -BtreeExtentMapManager::rm_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) -{ - logger().debug("{}: {}, {}, {}", __func__, lo, val.laddr, val.length); - return get_extmap_root(extmap_root, t).safe_then([this, &t, lo, val](auto extent) { - return extent->rm_lextent(get_ext_context(t), lo, val); - }).safe_then([](auto removed) { - logger().debug("{}: {}", __func__, removed); - return rm_lextent_ret( - rm_lextent_ertr::ready_future_marker{}, - removed); - }); -} - - -} diff --git a/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h b/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h deleted file mode 100644 index db676f41de6..00000000000 --- a/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h +++ /dev/null @@ -1,64 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#pragma once - -#include - -#include -#include -#include - -#include "include/ceph_assert.h" -#include "crimson/osd/exceptions.h" - -#include "crimson/os/seastore/extentmap_manager.h" -#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h" -#include "crimson/os/seastore/seastore_types.h" -#include "crimson/os/seastore/transaction_manager.h" - -namespace crimson::os::seastore::extentmap_manager { -/** - * BtreeExtentMapManager - * - * Uses a btree to track : - * objaddr_t -> laddr_t mapping for each onode extentmap - */ - -class BtreeExtentMapManager : public ExtentMapManager { - TransactionManager &tm; - - ext_context_t get_ext_context(Transaction &t) { - return ext_context_t{tm,t}; - } - - /* get_extmap_root - * - * load extent map tree root node - */ - using get_root_ertr = TransactionManager::read_extent_ertr; - using get_root_ret = get_root_ertr::future; - get_root_ret get_extmap_root(const extmap_root_t &extmap_root, Transaction &t); - - using insert_lextent_ertr = TransactionManager::read_extent_ertr; - using insert_lextent_ret = insert_lextent_ertr::future; - insert_lextent_ret insert_lextent(extmap_root_t &extmap_root, Transaction &t, - ExtMapNodeRef extent, objaddr_t lo, - lext_map_val_t val); - -public: - explicit BtreeExtentMapManager(TransactionManager &tm); - - initialize_extmap_ret initialize_extmap(Transaction &t) final; - - find_lextent_ret find_lextent(const extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, extent_len_t len) final; - - add_lextent_ret add_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) final; - - rm_lextent_ret rm_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) final; - - -}; -using BtreeExtentMapManagerRef = std::unique_ptr; - -} diff --git a/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h b/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h deleted file mode 100644 index 180760cde2a..00000000000 --- a/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h +++ /dev/null @@ -1,143 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - - -#pragma once - -#include - -#include "crimson/common/log.h" -#include "crimson/os/seastore/seastore_types.h" -#include "crimson/os/seastore/transaction_manager.h" -#include "crimson/os/seastore/extentmap_manager.h" - -namespace crimson::os::seastore::extentmap_manager{ - -struct ext_context_t { - TransactionManager &tm; - Transaction &t; -}; - -struct extmap_node_meta_t { - depth_t depth = 0; - - std::pair split_into(objaddr_t pivot) const { - return std::make_pair( - extmap_node_meta_t{depth}, - extmap_node_meta_t{depth}); - } - - static extmap_node_meta_t merge_from( - const extmap_node_meta_t &lhs, const extmap_node_meta_t &rhs) { - assert(lhs.depth == rhs.depth); - return extmap_node_meta_t{lhs.depth}; - } - - static std::pair - rebalance(const extmap_node_meta_t &lhs, const extmap_node_meta_t &rhs, laddr_t pivot) { - assert(lhs.depth == rhs.depth); - return std::make_pair( - extmap_node_meta_t{lhs.depth}, - extmap_node_meta_t{lhs.depth}); - } -}; - -struct ExtMapNode : LogicalCachedExtent { - using ExtMapNodeRef = TCachedExtentRef; - - ExtMapNode(ceph::bufferptr &&ptr) : LogicalCachedExtent(std::move(ptr)) {} - ExtMapNode(const ExtMapNode &other) - : LogicalCachedExtent(other) {} - - using find_lextent_ertr = ExtentMapManager::find_lextent_ertr; - using find_lextent_ret = ExtentMapManager::find_lextent_ret; - virtual find_lextent_ret find_lextent(ext_context_t ec, - objaddr_t lo, extent_len_t len) = 0; - - using insert_ertr = TransactionManager::read_extent_ertr; - using insert_ret = insert_ertr::future; - virtual insert_ret insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) = 0; - - using rm_lextent_ertr = TransactionManager::read_extent_ertr; - using rm_lextent_ret = rm_lextent_ertr::future; - virtual rm_lextent_ret rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) = 0; - - using split_children_ertr = TransactionManager::alloc_extent_ertr; - using split_children_ret = split_children_ertr::future - >; - virtual split_children_ret make_split_children(ext_context_t ec) = 0; - - using full_merge_ertr = TransactionManager::alloc_extent_ertr; - using full_merge_ret = full_merge_ertr::future; - virtual full_merge_ret make_full_merge(ext_context_t ec, ExtMapNodeRef right) = 0; - - using make_balanced_ertr = TransactionManager::alloc_extent_ertr; - using make_balanced_ret = make_balanced_ertr::future - >; - virtual make_balanced_ret - make_balanced(ext_context_t ec, ExtMapNodeRef right, bool prefer_left) = 0; - - virtual extmap_node_meta_t get_node_meta() const = 0; - - virtual bool at_max_capacity() const = 0; - virtual bool at_min_capacity() const = 0; - virtual unsigned get_node_size() const = 0; - virtual ~ExtMapNode() = default; - - using alloc_ertr = TransactionManager::alloc_extent_ertr; - template - alloc_ertr::future> - extmap_alloc_extent(ext_context_t ec, extent_len_t len) { - return ec.tm.alloc_extent(ec.t, L_ADDR_MIN, len).safe_then( - [](auto&& extent) { - return alloc_ertr::make_ready_future>(std::move(extent)); - }); - } - - template - alloc_ertr::future, TCachedExtentRef>> - extmap_alloc_2extents(ext_context_t ec, extent_len_t len) { - return seastar::do_with(std::pair, TCachedExtentRef>(), - [ec, len] (auto &extents) { - return crimson::do_for_each(boost::make_counting_iterator(0), - boost::make_counting_iterator(2), - [ec, len, &extents] (auto i) { - return ec.tm.alloc_extent(ec.t, L_ADDR_MIN, len).safe_then( - [i, &extents](auto &&node) { - if (i == 0) - extents.first = node; - if (i == 1) - extents.second = node; - }); - }).safe_then([&extents] { - return alloc_ertr::make_ready_future - , TCachedExtentRef>>(std::move(extents)); - }); - }); - } - - using retire_ertr = TransactionManager::ref_ertr; - using retire_ret = retire_ertr::future>; - retire_ret - extmap_retire_node(ext_context_t ec, std::list dec_laddrs) { - return seastar::do_with(std::move(dec_laddrs), std::list(), - [ec] (auto &&dec_laddrs, auto &refcnt) { - return crimson::do_for_each(dec_laddrs.begin(), dec_laddrs.end(), - [ec, &refcnt] (auto &laddr) { - return ec.tm.dec_ref(ec.t, laddr).safe_then([&refcnt] (auto ref) { - refcnt.push_back(ref); - }); - }).safe_then([&refcnt] { - return retire_ertr::make_ready_future>(std::move(refcnt)); - }); - }); - } - -}; - -using ExtMapNodeRef = ExtMapNode::ExtMapNodeRef; - -TransactionManager::read_extent_ertr::future -extmap_load_extent(ext_context_t ec, laddr_t laddr, depth_t depth); - -} diff --git a/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.cc b/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.cc deleted file mode 100644 index 7bf8680a532..00000000000 --- a/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.cc +++ /dev/null @@ -1,373 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include -#include - -#include -#include - -#include "include/buffer.h" -#include "include/byteorder.h" -#include "crimson/os/seastore/transaction_manager.h" -#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h" -#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h" - -namespace { - seastar::logger& logger() { - return crimson::get_logger(ceph_subsys_filestore); - } -} - -namespace crimson::os::seastore::extentmap_manager { - -std::ostream &ExtMapInnerNode::print_detail_l(std::ostream &out) const -{ - return out << ", size=" << get_size() - << ", depth=" << get_meta().depth; -} - -ExtMapInnerNode::find_lextent_ret -ExtMapInnerNode::find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len) -{ - auto [begin, end] = bound(lo, lo + len); - auto result_up = std::make_unique(); - auto &result = *result_up; - return crimson::do_for_each( - std::move(begin), - std::move(end), - [this, ec, &result, lo, len](const auto &val) mutable { - return extmap_load_extent(ec, val.get_val(), get_meta().depth - 1).safe_then( - [ec, &result, lo, len](auto extent) mutable { - return extent->find_lextent(ec, lo, len).safe_then( - [&result](auto item_list) mutable { - result.splice(result.end(), item_list, - item_list.begin(), item_list.end()); - }); - }); - }).safe_then([result=std::move(result_up)] { - return find_lextent_ret( - find_lextent_ertr::ready_future_marker{}, - std::move(*result)); - }); -} - -ExtMapInnerNode::insert_ret -ExtMapInnerNode::insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) -{ - auto insertion_pt = get_containing_child(lo); - assert(insertion_pt != end()); - return extmap_load_extent(ec, insertion_pt->get_val(), get_meta().depth - 1).safe_then( - [this, ec, insertion_pt, lo, val=std::move(val)](auto extent) mutable { - return extent->at_max_capacity() ? - split_entry(ec, lo, insertion_pt, extent) : - insert_ertr::make_ready_future(std::move(extent)); - }).safe_then([ec, lo, val=std::move(val)](ExtMapNodeRef extent) mutable { - return extent->insert(ec, lo, val); - }); -} - -ExtMapInnerNode::rm_lextent_ret -ExtMapInnerNode::rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) -{ - auto rm_pt = get_containing_child(lo); - return extmap_load_extent(ec, rm_pt->get_val(), get_meta().depth - 1).safe_then( - [this, ec, rm_pt, lo, val=std::move(val)](auto extent) mutable { - if (extent->at_min_capacity() && get_node_size() > 1) { - return merge_entry(ec, lo, rm_pt, extent); - } else { - return merge_entry_ertr::make_ready_future(std::move(extent)); - } - }).safe_then([ec, lo, val](ExtMapNodeRef extent) mutable { - return extent->rm_lextent(ec, lo, val); - }); -} - -ExtMapInnerNode::split_children_ret -ExtMapInnerNode::make_split_children(ext_context_t ec) -{ - logger().debug("{}: {}", "ExtMapInnerNode", __func__); - return extmap_alloc_2extents(ec, EXTMAP_BLOCK_SIZE) - .safe_then([this] (auto &&ext_pair) { - auto [left, right] = ext_pair; - return split_children_ret( - split_children_ertr::ready_future_marker{}, - std::make_tuple(left, right, split_into(*left, *right))); - }); -} - -ExtMapInnerNode::full_merge_ret -ExtMapInnerNode::make_full_merge(ext_context_t ec, ExtMapNodeRef right) -{ - logger().debug("{}: {}", "ExtMapInnerNode", __func__); - return extmap_alloc_extent(ec, EXTMAP_BLOCK_SIZE) - .safe_then([this, right] (auto &&replacement) { - replacement->merge_from(*this, *right->cast()); - return full_merge_ret( - full_merge_ertr::ready_future_marker{}, - std::move(replacement)); - }); -} - -ExtMapInnerNode::make_balanced_ret -ExtMapInnerNode::make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left) -{ - logger().debug("{}: {}", "ExtMapInnerNode", __func__); - ceph_assert(_right->get_type() == type); - return extmap_alloc_2extents(ec, EXTMAP_BLOCK_SIZE) - .safe_then([this, _right, prefer_left] (auto &&replacement_pair){ - auto [replacement_left, replacement_right] = replacement_pair; - auto &right = *_right->cast(); - return make_balanced_ret( - make_balanced_ertr::ready_future_marker{}, - std::make_tuple(replacement_left, replacement_right, - balance_into_new_nodes(*this, right, prefer_left, - *replacement_left, *replacement_right))); - }); -} - -ExtMapInnerNode::split_entry_ret -ExtMapInnerNode::split_entry(ext_context_t ec, objaddr_t lo, - internal_iterator_t iter, ExtMapNodeRef entry) -{ - logger().debug("{}: {}", "ExtMapInnerNode", __func__); - if (!is_pending()) { - auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast(); - auto mut_iter = mut->iter_idx(iter->get_offset()); - return mut->split_entry(ec, lo, mut_iter, entry); - } - ceph_assert(!at_max_capacity()); - return entry->make_split_children(ec) - .safe_then([this, ec, lo, iter, entry] (auto tuple){ - auto [left, right, pivot] = tuple; - journal_update(iter, left->get_laddr(), maybe_get_delta_buffer()); - journal_insert(iter + 1, pivot, right->get_laddr(), maybe_get_delta_buffer()); - logger().debug( - "ExtMapInnerNode::split_entry *this {} entry {} into left {} right {}", - *this, *entry, *left, *right); - //retire extent - return ec.tm.dec_ref(ec.t, entry->get_laddr()) - .safe_then([lo, left = left, right = right, pivot = pivot] (auto ret) { - return split_entry_ertr::make_ready_future( - pivot > lo ? left : right); - }); - }); -} - -ExtMapInnerNode::merge_entry_ret -ExtMapInnerNode::merge_entry(ext_context_t ec, objaddr_t lo, - internal_iterator_t iter, ExtMapNodeRef entry) -{ - if (!is_pending()) { - auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast(); - auto mut_iter = mut->iter_idx(iter->get_offset()); - return mut->merge_entry(ec, lo, mut_iter, entry); - } - logger().debug("ExtMapInnerNode: merge_entry: {}, {}", *this, *entry); - auto is_left = (iter + 1) == end(); - auto donor_iter = is_left ? iter - 1 : iter + 1; - return extmap_load_extent(ec, donor_iter->get_val(), get_meta().depth - 1) - .safe_then([this, ec, lo, iter, entry, donor_iter, is_left] - (auto &&donor) mutable { - auto [l, r] = is_left ? - std::make_pair(donor, entry) : std::make_pair(entry, donor); - auto [liter, riter] = is_left ? - std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter); - if (donor->at_min_capacity()) { - return l->make_full_merge(ec, r) - .safe_then([this, ec, entry, l = l, r = r, liter = liter, riter = riter] - (auto &&replacement){ - journal_update(liter, replacement->get_laddr(), maybe_get_delta_buffer()); - journal_remove(riter, maybe_get_delta_buffer()); - //retire extent - std::list dec_laddrs; - dec_laddrs.push_back(l->get_laddr()); - dec_laddrs.push_back(r->get_laddr()); - return extmap_retire_node(ec, dec_laddrs) - .safe_then([replacement] (auto &&ret) { - return merge_entry_ertr::make_ready_future(replacement); - }); - }); - } else { - logger().debug("ExtMapInnerNode::merge_entry balanced l {} r {}", - *l, *r); - return l->make_balanced(ec, r, !is_left) - .safe_then([this, ec, lo, entry, l = l, r = r, liter = liter, riter = riter] - (auto tuple) { - auto [replacement_l, replacement_r, pivot] = tuple; - journal_update(liter, replacement_l->get_laddr(), maybe_get_delta_buffer()); - journal_replace(riter, pivot, replacement_r->get_laddr(), - maybe_get_delta_buffer()); - // retire extent - std::list dec_laddrs; - dec_laddrs.push_back(l->get_laddr()); - dec_laddrs.push_back(r->get_laddr()); - return extmap_retire_node(ec, dec_laddrs) - .safe_then([lo, pivot = pivot, replacement_l = replacement_l, replacement_r = replacement_r] - (auto &&ret) { - return merge_entry_ertr::make_ready_future( - lo >= pivot ? replacement_r : replacement_l); - }); - }); - } - }); -} - - -ExtMapInnerNode::internal_iterator_t -ExtMapInnerNode::get_containing_child(objaddr_t lo) -{ - // TODO: binary search - for (auto i = begin(); i != end(); ++i) { - if (i.contains(lo)) - return i; - } - ceph_assert(0 == "invalid"); - return end(); -} - -std::ostream &ExtMapLeafNode::print_detail_l(std::ostream &out) const -{ - return out << ", size=" << get_size() - << ", depth=" << get_meta().depth; -} - -ExtMapLeafNode::find_lextent_ret -ExtMapLeafNode::find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len) -{ - logger().debug( - "ExtMapLeafNode::find_lextent {}~{}", lo, len); - auto ret = extent_map_list_t(); - auto [from, to] = get_leaf_entries(lo, len); - if (from == to && to != end()) - ++to; - for (; from != to; ++from) { - auto val = (*from).get_val(); - ret.emplace_back( - extent_mapping_t( - (*from).get_key(), - val.laddr, - val.length)); - logger().debug("ExtMapLeafNode::find_lextent find {}~{}", lo, val.laddr); - } - return find_lextent_ertr::make_ready_future( - std::move(ret)); -} - -ExtMapLeafNode::insert_ret -ExtMapLeafNode::insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) -{ - ceph_assert(!at_max_capacity()); - if (!is_pending()) { - auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast(); - return mut->insert(ec, lo, val); - } - auto insert_pt = lower_bound(lo); - journal_insert(insert_pt, lo, val, maybe_get_delta_buffer()); - - logger().debug( - "ExtMapLeafNode::insert: inserted {}->{} {}", - insert_pt.get_key(), - insert_pt.get_val().laddr, - insert_pt.get_val().length); - return insert_ertr::make_ready_future( - extent_mapping_t(lo, val.laddr, val.length)); -} - -ExtMapLeafNode::rm_lextent_ret -ExtMapLeafNode::rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) -{ - if (!is_pending()) { - auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast(); - return mut->rm_lextent(ec, lo, val); - } - - auto [rm_pt, rm_end] = get_leaf_entries(lo, val.length); - if (lo == rm_pt->get_key() && val.laddr == rm_pt->get_val().laddr - && val.length == rm_pt->get_val().length) { - journal_remove(rm_pt, maybe_get_delta_buffer()); - logger().debug( - "ExtMapLeafNode::rm_lextent: removed {}->{} {}", - rm_pt.get_key(), - rm_pt.get_val().laddr, - rm_pt.get_val().length); - return rm_lextent_ertr::make_ready_future(true); - } else { - return rm_lextent_ertr::make_ready_future(false); - } -} - -ExtMapLeafNode::split_children_ret -ExtMapLeafNode::make_split_children(ext_context_t ec) -{ - logger().debug("{}: {}", "ExtMapLeafNode", __func__); - return extmap_alloc_2extents(ec, EXTMAP_BLOCK_SIZE) - .safe_then([this] (auto &&ext_pair) { - auto [left, right] = ext_pair; - return split_children_ret( - split_children_ertr::ready_future_marker{}, - std::make_tuple(left, right, split_into(*left, *right))); - }); -} - -ExtMapLeafNode::full_merge_ret -ExtMapLeafNode::make_full_merge(ext_context_t ec, ExtMapNodeRef right) -{ - logger().debug("{}: {}", "ExtMapLeafNode", __func__); - return extmap_alloc_extent(ec, EXTMAP_BLOCK_SIZE) - .safe_then([this, right] (auto &&replacement) { - replacement->merge_from(*this, *right->cast()); - return full_merge_ret( - full_merge_ertr::ready_future_marker{}, - std::move(replacement)); - }); -} -ExtMapLeafNode::make_balanced_ret -ExtMapLeafNode::make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left) -{ - logger().debug("{}: {}", "ExtMapLeafNode", __func__); - ceph_assert(_right->get_type() == type); - return extmap_alloc_2extents(ec, EXTMAP_BLOCK_SIZE) - .safe_then([this, _right, prefer_left] (auto &&replacement_pair) { - auto [replacement_left, replacement_right] = replacement_pair; - auto &right = *_right->cast(); - return make_balanced_ret( - make_balanced_ertr::ready_future_marker{}, - std::make_tuple( - replacement_left, replacement_right, - balance_into_new_nodes( - *this, right, prefer_left, - *replacement_left, *replacement_right))); - }); -} - - -std::pair -ExtMapLeafNode::get_leaf_entries(objaddr_t addr, extent_len_t len) -{ - return bound(addr, addr + len); -} - - -TransactionManager::read_extent_ertr::future -extmap_load_extent(ext_context_t ec, laddr_t laddr, depth_t depth) -{ - ceph_assert(depth > 0); - if (depth > 1) { - return ec.tm.read_extents(ec.t, laddr, EXTMAP_BLOCK_SIZE).safe_then( - [](auto&& extents) { - assert(extents.size() == 1); - [[maybe_unused]] auto [laddr, e] = extents.front(); - return TransactionManager::read_extent_ertr::make_ready_future(std::move(e)); - }); - } else { - return ec.tm.read_extents(ec.t, laddr, EXTMAP_BLOCK_SIZE).safe_then( - [](auto&& extents) { - assert(extents.size() == 1); - [[maybe_unused]] auto [laddr, e] = extents.front(); - return TransactionManager::read_extent_ertr::make_ready_future(std::move(e)); - }); - } -} - -} diff --git a/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h b/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h deleted file mode 100644 index 94a5dd78849..00000000000 --- a/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h +++ /dev/null @@ -1,281 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#pragma once -#include "include/buffer.h" - -#include "crimson/common/fixed_kv_node_layout.h" -#include "crimson/common/errorator.h" -#include "crimson/os/seastore/extentmap_manager.h" -#include "crimson/os/seastore/seastore_types.h" -#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h" - -namespace crimson::os::seastore::extentmap_manager { - -struct extmap_node_meta_le_t { - depth_le_t depth = init_depth_le(0); - - extmap_node_meta_le_t() = default; - extmap_node_meta_le_t(const extmap_node_meta_le_t &) = default; - explicit extmap_node_meta_le_t(const extmap_node_meta_t &val) - : depth(init_depth_le(val.depth)) {} - - operator extmap_node_meta_t() const { - return extmap_node_meta_t{ depth }; - } -}; - -/** - * ExtMapInnerNode - * - * Abstracts operations on and layout of internal nodes for the - * Extentmap Tree. - * - * Layout (4k): - * num_entries: uint32_t 4b - * meta : depth 4b - * (padding) : 8b - * keys : objaddr_t[340] (340*4)b - * values : laddr_t[340] (340*8)b - * = 4096 - */ -constexpr size_t INNER_NODE_CAPACITY = - (EXTMAP_BLOCK_SIZE - sizeof(uint32_t) - sizeof(extmap_node_meta_t)) - / (sizeof (objaddr_t) + sizeof(laddr_t)); - -struct ExtMapInnerNode - : ExtMapNode, - common::FixedKVNodeLayout< - INNER_NODE_CAPACITY, - extmap_node_meta_t, extmap_node_meta_le_t, - objaddr_t, ceph_le32, - laddr_t, laddr_le_t> { - using internal_iterator_t = const_iterator; - template - ExtMapInnerNode(T&&... t) : - ExtMapNode(std::forward(t)...), - FixedKVNodeLayout(get_bptr().c_str()) {} - - static constexpr extent_types_t type = extent_types_t::EXTMAP_INNER; - - extmap_node_meta_t get_node_meta() const final {return get_meta();} - - CachedExtentRef duplicate_for_write() final { - assert(delta_buffer.empty()); - return CachedExtentRef(new ExtMapInnerNode(*this)); - }; - - delta_buffer_t delta_buffer; - delta_buffer_t *maybe_get_delta_buffer() { - return is_mutation_pending() ? &delta_buffer : nullptr; - } - - find_lextent_ret find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len) final; - - insert_ret insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final; - - rm_lextent_ret rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final; - - split_children_ret make_split_children(ext_context_t ec) final; - - full_merge_ret make_full_merge(ext_context_t ec, ExtMapNodeRef right) final; - - make_balanced_ret make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left) final; - - std::ostream &print_detail_l(std::ostream &out) const final; - - extent_types_t get_type() const final { - return type; - } - - ceph::bufferlist get_delta() final { - assert(!delta_buffer.empty()); - ceph::buffer::ptr bptr(delta_buffer.get_bytes()); - delta_buffer.copy_out(bptr.c_str(), bptr.length()); - ceph::bufferlist bl; - bl.push_back(bptr); - return bl; - } - - void apply_delta(const ceph::bufferlist &_bl) final { - assert(_bl.length()); - ceph::bufferlist bl = _bl; - bl.rebuild(); - delta_buffer_t buffer; - buffer.copy_in(bl.front().c_str(), bl.front().length()); - buffer.replay(*this); - } - - bool at_max_capacity() const final { - return get_size() == get_capacity(); - } - - bool at_min_capacity() const { - return get_size() == get_capacity() / 2; - } - - unsigned get_node_size() const { - return get_size(); - } - - /* get the iterator containing [l, r] - */ - std::pair bound( - objaddr_t l, objaddr_t r) { - auto retl = begin(); - for (; retl != end(); ++retl) { - if (retl->get_next_key_or_max() > l) - break; - } - auto retr = retl; - for (; retr != end(); ++retr) { - if (retr->get_key() >= r) - break; - } - return {retl, retr}; - } - - using split_entry_ertr = TransactionManager::read_extent_ertr; - using split_entry_ret = split_entry_ertr::future; - split_entry_ret split_entry(ext_context_t ec, objaddr_t lo, - internal_iterator_t, ExtMapNodeRef entry); - using merge_entry_ertr = TransactionManager::read_extent_ertr; - using merge_entry_ret = merge_entry_ertr::future; - merge_entry_ret merge_entry(ext_context_t ec, objaddr_t lo, - internal_iterator_t iter, ExtMapNodeRef entry); - internal_iterator_t get_containing_child(objaddr_t lo); - -}; - -/** - * ExtMapLeafNode - * - * Abstracts operations on and layout of leaf nodes for the - * ExtentMap Tree. - * - * Layout (4k): - * num_entries: uint32_t 4b - * meta : depth 4b - * (padding) : 8b - * keys : objaddr_t[204] (204*4)b - * values : lext_map_val_t[204] (204*16)b - * = 4096 - */ -constexpr size_t LEAF_NODE_CAPACITY = - (EXTMAP_BLOCK_SIZE - sizeof(uint32_t) - sizeof(extmap_node_meta_t)) - / (sizeof(objaddr_t) + sizeof(lext_map_val_t)); - -struct lext_map_val_le_t { - laddr_le_t laddr; - extent_len_le_t length = init_extent_len_le(0); - - lext_map_val_le_t() = default; - lext_map_val_le_t(const lext_map_val_le_t &) = default; - explicit lext_map_val_le_t(const lext_map_val_t &val) - : laddr(laddr_le_t(val.laddr)), - length(init_extent_len_le(val.length)) {} - - operator lext_map_val_t() const { - return lext_map_val_t{laddr, length}; - } -}; - -struct ExtMapLeafNode - : ExtMapNode, - common::FixedKVNodeLayout< - LEAF_NODE_CAPACITY, - extmap_node_meta_t, extmap_node_meta_le_t, - objaddr_t, ceph_le32, - lext_map_val_t, lext_map_val_le_t> { - using internal_iterator_t = const_iterator; - template - ExtMapLeafNode(T&&... t) : - ExtMapNode(std::forward(t)...), - FixedKVNodeLayout(get_bptr().c_str()) {} - - static constexpr extent_types_t type = extent_types_t::EXTMAP_LEAF; - - extmap_node_meta_t get_node_meta() const final { return get_meta(); } - - CachedExtentRef duplicate_for_write() final { - assert(delta_buffer.empty()); - return CachedExtentRef(new ExtMapLeafNode(*this)); - }; - - delta_buffer_t delta_buffer; - delta_buffer_t *maybe_get_delta_buffer() { - return is_mutation_pending() ? &delta_buffer : nullptr; - } - - find_lextent_ret find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len) final; - - insert_ret insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final; - - rm_lextent_ret rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final; - - split_children_ret make_split_children(ext_context_t ec) final; - - full_merge_ret make_full_merge(ext_context_t ec, ExtMapNodeRef right) final; - - make_balanced_ret make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left) final; - - extent_types_t get_type() const final { - return type; - } - - ceph::bufferlist get_delta() final { - assert(!delta_buffer.empty()); - ceph::buffer::ptr bptr(delta_buffer.get_bytes()); - delta_buffer.copy_out(bptr.c_str(), bptr.length()); - ceph::bufferlist bl; - bl.push_back(bptr); - return bl; - } - - void apply_delta(const ceph::bufferlist &_bl) final { - assert(_bl.length()); - ceph::bufferlist bl = _bl; - bl.rebuild(); - delta_buffer_t buffer; - buffer.copy_in(bl.front().c_str(), bl.front().length()); - buffer.replay(*this); - } - - std::ostream &print_detail_l(std::ostream &out) const final; - - bool at_max_capacity() const final { - return get_size() == get_capacity(); - } - - bool at_min_capacity() const final { - return get_size() == get_capacity() / 2; - } - - unsigned get_node_size() const { - return get_size(); - } - - /* get the iterator containing [l, r] - */ - std::pair bound( - objaddr_t l, objaddr_t r) { - auto retl = begin(); - for (; retl != end(); ++retl) { - if (retl->get_key() >= l || (retl->get_key() + retl->get_val().length) > l) - break; - } - auto retr = retl; - for (; retr != end(); ++retr) { - if (retr->get_key() >= r) - break; - } - return {retl, retr}; - } - - std::pair - get_leaf_entries(objaddr_t lo, extent_len_t len); - -}; -using ExtentMapLeafNodeRef = TCachedExtentRef; - -} diff --git a/src/crimson/os/seastore/lba_manager.h b/src/crimson/os/seastore/lba_manager.h index 26646ff0546..864637df198 100644 --- a/src/crimson/os/seastore/lba_manager.h +++ b/src/crimson/os/seastore/lba_manager.h @@ -59,6 +59,18 @@ public: Transaction &t, laddr_list_t &&extent_lisk) = 0; + /** + * Finds unmapped laddr extent of len len + */ + using find_hole_ertr = base_ertr; + using find_hole_ret = find_hole_ertr::future< + std::pair + >; + virtual find_hole_ret find_hole( + Transaction &t, + laddr_t hint, + extent_len_t) = 0; + /** * Allocates a new mapping referenced by LBARef * diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index aa9fea07904..bbbf0a90f1c 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -102,6 +102,26 @@ BtreeLBAManager::get_mappings( }); } +BtreeLBAManager::find_hole_ret +BtreeLBAManager::find_hole( + Transaction &t, + laddr_t hint, + extent_len_t len) +{ + return get_root(t + ).safe_then([this, hint, len, &t](auto extent) { + return extent->find_hole( + get_context(t), + hint, + L_ADDR_MAX, + len); + }).safe_then([len](auto addr) { + return seastar::make_ready_future>( + addr, len); + }); + +} + BtreeLBAManager::alloc_extent_ret BtreeLBAManager::alloc_extent( Transaction &t, diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h index 76fa2d19448..34fa3280957 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h @@ -58,6 +58,11 @@ public: Transaction &t, laddr_list_t &&list) final; + find_hole_ret find_hole( + Transaction &t, + laddr_t hint, + extent_len_t) final; + alloc_extent_ret alloc_extent( Transaction &t, laddr_t hint, diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h b/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h index ba3d4dbd9b9..d4529ae48ec 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h @@ -263,6 +263,7 @@ public: auto ret = std::unique_ptr(new BtreeLBAPin); ret->pin.set_range(pin.range); ret->paddr = paddr; + ret->parent = parent; return ret; } diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc new file mode 100644 index 00000000000..59f2f5462a9 --- /dev/null +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -0,0 +1,536 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include + +#include "crimson/common/log.h" + +#include "crimson/os/seastore/object_data_handler.h" + +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } +} + +namespace crimson::os::seastore { + +/** + * MAX_OBJECT_SIZE + * + * For now, we allocate a fixed region of laddr space of size MAX_OBJECT_SIZE + * for any object. In the future, once we have the ability to remap logical + * mappings (necessary for clone), we'll add the ability to grow and shrink + * these regions and remove this assumption. + */ +static constexpr extent_len_t MAX_OBJECT_SIZE = 16<<20; +#define assert_aligned(x) ceph_assert(((x)%ctx.tm.get_block_size()) == 0) + +using context_t = ObjectDataHandler::context_t; +using get_ertr = ObjectDataHandler::write_ertr; + +auto read_pin( + context_t ctx, + LBAPinRef pin) { + return ctx.tm.pin_to_extent( + ctx.t, + std::move(pin) + ).handle_error( + get_ertr::pass_further{}, + crimson::ct_error::assert_all{ "read_pin: invalid error" } + ); +} + +/** + * extent_to_write_t + * + * Encapsulates extents to be written out using do_insertions. + * Indicates a zero extent or a data extent based on whether + * to_write is populate. + */ +struct extent_to_write_t { + laddr_t addr = L_ADDR_NULL; + extent_len_t len; + std::optional to_write; + + extent_to_write_t() = default; + extent_to_write_t(const extent_to_write_t &) = default; + extent_to_write_t(extent_to_write_t &&) = default; + + extent_to_write_t(laddr_t addr, bufferlist to_write) + : addr(addr), len(to_write.length()), to_write(to_write) {} + + extent_to_write_t(laddr_t addr, extent_len_t len) + : addr(addr), len(len) {} +}; +using extent_to_write_list_t = std::list; + +/// Removes extents/mappings in pins +ObjectDataHandler::write_ret do_removals( + context_t ctx, + lba_pin_list_t &pins) +{ + return crimson::do_for_each( + pins.begin(), + pins.end(), + [ctx](auto &pin) { + return ctx.tm.dec_ref( + ctx.t, + pin->get_laddr() + ).safe_then( + [](auto){}, + ObjectDataHandler::write_ertr::pass_further{}, + crimson::ct_error::assert_all{ + "object_data_handler::do_removals invalid error" + } + ); + }); +} + +/// Creates zero/data extents in to_write +ObjectDataHandler::write_ret do_insertions( + context_t ctx, + extent_to_write_list_t &to_write) +{ + return crimson::do_for_each( + to_write.begin(), + to_write.end(), + [ctx](auto ®ion) { + if (region.to_write) { + assert_aligned(region.addr); + assert_aligned(region.len); + ceph_assert(region.len == region.to_write->length()); + return ctx.tm.alloc_extent( + ctx.t, + region.addr, + region.len + ).safe_then([ctx, ®ion](auto extent) { + if (extent->get_laddr() != region.addr) { + logger().debug( + "object_data_handler::do_insertions alloc got addr {}," + " should have been {}", + extent->get_laddr(), + region.addr); + } + ceph_assert(extent->get_laddr() == region.addr); + ceph_assert(extent->get_length() == region.len); + auto iter = region.to_write->cbegin(); + iter.copy(region.len, extent->get_bptr().c_str()); + return ObjectDataHandler::write_ertr::now(); + }); + } else { + return ctx.tm.reserve_region( + ctx.t, + region.addr, + region.len + ).safe_then([®ion](auto pin) { + ceph_assert(pin->get_length() == region.len); + ceph_assert(pin->get_laddr() == region.addr); + return ObjectDataHandler::write_ertr::now(); + }); + } + }); +} + +/** + * split_pin_left + * + * Splits the passed pin returning aligned extent to be rewritten + * to the left (if a zero extent), tail to be prepended to write + * beginning at offset. See below for details. + */ +using split_ret_bare = std::pair< + std::optional, + std::optional>; +using split_ret = get_ertr::future; +split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset) +{ + const auto pin_offset = pin->get_laddr(); + assert_aligned(pin_offset); + ceph_assert(offset >= pin_offset); + if (offset == pin_offset) { + // Aligned, no tail and no extra extent + return get_ertr::make_ready_future( + std::nullopt, + std::nullopt); + } else if (pin->get_paddr().is_zero()) { + /* Zero extent unaligned, return largest aligned zero extent to + * the left and the gap between aligned_offset and offset to prepend. */ + auto aligned_offset = p2align(offset, (uint64_t)ctx.tm.get_block_size()); + assert_aligned(aligned_offset); + ceph_assert(aligned_offset <= offset); + auto zero_extent_len = aligned_offset - pin_offset; + assert_aligned(zero_extent_len); + auto zero_prepend_len = offset - aligned_offset; + return get_ertr::make_ready_future( + (zero_extent_len == 0 + ? std::nullopt + : std::make_optional(extent_to_write_t(pin_offset, zero_extent_len))), + bufferptr(ceph::buffer::create(zero_prepend_len, 0)) + ); + } else { + // Data, return up to offset to prepend + auto to_prepend = offset - pin->get_laddr(); + return read_pin(ctx, pin->duplicate() + ).safe_then([to_prepend](auto extent) { + return get_ertr::make_ready_future( + std::nullopt, + bufferptr(extent->get_bptr(), 0, to_prepend)); + }); + } +}; + +/// Reverse of split_pin_left +split_ret split_pin_right(context_t ctx, LBAPinRef &pin, laddr_t end) +{ + const auto pin_begin = pin->get_laddr(); + const auto pin_end = pin->get_laddr() + pin->get_length(); + assert_aligned(pin_end); + ceph_assert(pin_end >= end); + if (end == pin_end) { + return get_ertr::make_ready_future( + std::nullopt, + std::nullopt); + } else if (pin->get_paddr().is_zero()) { + auto aligned_end = p2roundup(end, (uint64_t)ctx.tm.get_block_size()); + assert_aligned(aligned_end); + ceph_assert(aligned_end >= end); + auto zero_suffix_len = aligned_end - end; + auto zero_extent_len = pin_end - aligned_end; + assert_aligned(zero_extent_len); + return get_ertr::make_ready_future( + (zero_extent_len == 0 + ? std::nullopt + : std::make_optional(extent_to_write_t(aligned_end, zero_extent_len))), + bufferptr(ceph::buffer::create(zero_suffix_len, 0)) + ); + } else { + return read_pin(ctx, pin->duplicate() + ).safe_then([end, pin_begin, pin_end](auto extent) { + return get_ertr::make_ready_future( + std::nullopt, + bufferptr( + extent->get_bptr(), + end - pin_begin, + pin_end - end)); + }); + } +}; + +template +auto with_object_data( + ObjectDataHandler::context_t ctx, + F &&f) +{ + return seastar::do_with( + ctx.onode.get_layout().object_data.get(), + std::forward(f), + [ctx](auto &object_data, auto &f) { + return std::invoke(f, object_data + ).safe_then([ctx, &object_data] { + if (object_data.must_update()) { + ctx.onode.get_mutable_layout(ctx.t).object_data.update(object_data); + } + return seastar::now(); + }); + }); +} + +ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation( + context_t ctx, + object_data_t &object_data, + extent_len_t size) +{ + ceph_assert(size <= MAX_OBJECT_SIZE); + if (!object_data.is_null()) { + ceph_assert(object_data.get_reserved_data_len() == MAX_OBJECT_SIZE); + return write_ertr::now(); + } else { + return ctx.tm.reserve_region( + ctx.t, + 0 /* TODO -- pass hint based on object hash */, + MAX_OBJECT_SIZE + ).safe_then([size, &object_data](auto pin) { + ceph_assert(pin->get_length() == MAX_OBJECT_SIZE); + object_data.update_reserved( + pin->get_laddr(), + pin->get_length()); + return write_ertr::now(); + }); + } +} + +ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation( + context_t ctx, object_data_t &object_data, extent_len_t size) +{ + ceph_assert(!object_data.is_null()); + assert_aligned(size); + ceph_assert(size <= object_data.get_reserved_data_len()); + return seastar::do_with( + lba_pin_list_t(), + extent_to_write_list_t(), + [this, ctx, size, &object_data](auto &pins, auto &to_write) { + return ctx.tm.get_pins( + ctx.t, + object_data.get_reserved_data_base() + size, + object_data.get_reserved_data_len() - size + ).safe_then([this, ctx, size, &pins, &object_data, &to_write](auto _pins) { + _pins.swap(pins); + ceph_assert(pins.size()); + auto &pin = *pins.front(); + ceph_assert(pin.get_laddr() >= object_data.get_reserved_data_base()); + ceph_assert( + pin.get_laddr() <= object_data.get_reserved_data_base() + size); + auto pin_offset = pin.get_laddr() - + object_data.get_reserved_data_base(); + if (pin.get_paddr().is_zero()) { + to_write.emplace_back( + pin.get_laddr(), + object_data.get_reserved_data_len() - pin_offset); + return clear_ertr::now(); + } else { + return read_pin( + ctx, + pin.duplicate() + ).safe_then([ctx, size, pin_offset, &pin, &object_data, &to_write]( + auto extent) { + bufferlist bl; + bl.append( + bufferptr( + extent->get_bptr(), + 0, + size - pin_offset + )); + to_write.emplace_back( + pin.get_laddr(), + bl); + to_write.emplace_back( + object_data.get_reserved_data_base() + size, + object_data.get_reserved_data_len() - size); + return clear_ertr::now(); + }); + } + }).safe_then([ctx, size, &pins] { + return do_removals(ctx, pins); + }).safe_then([ctx, size, &to_write] { + return do_insertions(ctx, to_write); + }).safe_then([size, &object_data] { + if (size == 0) { + object_data.clear(); + } + return ObjectDataHandler::clear_ertr::now(); + }); + }); +} + +/** + * get_buffers + * + * Returns extent_to_write_t's from bl. + * + * TODO: probably add some kind of upper limit on extent size. + */ +extent_to_write_list_t get_buffers(laddr_t offset, bufferlist &bl) +{ + auto ret = extent_to_write_list_t(); + ret.emplace_back(offset, bl); + return ret; +}; + +ObjectDataHandler::write_ret ObjectDataHandler::overwrite( + context_t ctx, + laddr_t _offset, + bufferlist &&bl, + lba_pin_list_t &&_pins) +{ + return seastar::do_with( + _offset, + std::move(bl), + std::move(_pins), + extent_to_write_list_t(), + [this, ctx](laddr_t &offset, auto &bl, auto &pins, auto &to_write) { + ceph_assert(pins.size() >= 1); + auto pin_begin = pins.front()->get_laddr(); + ceph_assert(pin_begin <= offset); + auto pin_end = pins.back()->get_laddr() + pins.back()->get_length(); + ceph_assert(pin_end >= (offset > bl.length())); + + return split_pin_left( + ctx, + pins.front(), + offset + ).safe_then([this, ctx, pin_begin, &offset, &bl, &pins, &to_write]( + auto p) { + auto &[left_extent, headptr] = p; + if (left_extent) { + ceph_assert(left_extent->addr == pin_begin); + to_write.push_front(std::move(*left_extent)); + } + if (headptr) { + bufferlist newbl; + newbl.append(*headptr); + newbl.append(bl); + bl.swap(newbl); + offset -= headptr->length(); + assert_aligned(offset); + } + return split_pin_right( + ctx, + pins.back(), + offset + bl.length()); + }).safe_then([this, ctx, pin_end, &offset, &bl, &pins, &to_write]( + auto p) { + auto &[right_extent, tailptr] = p; + if (tailptr) { + bl.append(*tailptr); + assert_aligned(bl.length()); + } + to_write.splice(to_write.end(), get_buffers(offset, bl)); + if (right_extent) { + ceph_assert((right_extent->addr + right_extent->len) == pin_end); + to_write.push_back(std::move(*right_extent)); + } + return write_ertr::now(); + }).safe_then([this, ctx, &pins] { + return do_removals(ctx, pins); + }).safe_then([this, ctx, &to_write] { + return do_insertions(ctx, to_write); + }); + }); +} + +ObjectDataHandler::write_ret ObjectDataHandler::write( + context_t ctx, + objaddr_t offset, + const bufferlist &bl) +{ + return with_object_data( + ctx, + [this, ctx, offset, &bl](auto &object_data) { + return prepare_data_reservation( + ctx, + object_data, + p2roundup(offset + bl.length(), ctx.tm.get_block_size()) + ).safe_then([this, ctx, offset, &object_data, &bl] { + auto logical_offset = object_data.get_reserved_data_base() + offset; + return ctx.tm.get_pins( + ctx.t, + logical_offset, + bl.length() + ).safe_then([this, ctx, offset, logical_offset, &object_data, &bl]( + auto pins) { + return overwrite(ctx, logical_offset, bufferlist(bl), std::move(pins)); + }); + }); + }); +} + +ObjectDataHandler::read_ret ObjectDataHandler::read( + context_t ctx, + objaddr_t obj_offset, + extent_len_t len) +{ + return seastar::do_with( + bufferlist(), + [this, ctx, obj_offset, len](auto &ret) { + return with_object_data( + ctx, + [this, ctx, obj_offset, len, &ret](const auto &object_data) { + /* Assumption: callers ensure that onode size is <= reserved + * size and that len is adjusted here prior to call */ + ceph_assert(!object_data.is_null()); + ceph_assert((obj_offset + len) <= object_data.get_reserved_data_len()); + ceph_assert(len > 0); + laddr_t loffset = + object_data.get_reserved_data_base() + obj_offset; + return ctx.tm.get_pins( + ctx.t, + loffset, + len + ).safe_then([this, ctx, loffset, len, &ret](auto _pins) { + // offset~len falls within reserved region and len > 0 + ceph_assert(_pins.size() >= 1); + ceph_assert((*_pins.begin())->get_laddr() <= loffset); + return seastar::do_with( + std::move(_pins), + loffset, + [this, ctx, loffset, len, &ret](auto &pins, auto ¤t) { + return crimson::do_for_each( + std::begin(pins), + std::end(pins), + [this, ctx, loffset, len, ¤t, &ret](auto &pin) + -> read_ertr::future<> { + ceph_assert(current <= (loffset + len)); + ceph_assert( + (loffset + len) > pin->get_laddr()); + laddr_t end = std::min( + pin->get_laddr() + pin->get_length(), + loffset + len); + if (pin->get_paddr().is_zero()) { + ceph_assert(end > current); // See LBAManager::get_mappings + ret.append_zero(end - current); + current = end; + return seastar::now(); + } else { + return ctx.tm.pin_to_extent( + ctx.t, + std::move(pin) + ).safe_then([&ret, ¤t, end](auto extent) { + ceph_assert( + (extent->get_laddr() + extent->get_length()) >= end); + ceph_assert(end > current); + ret.append( + bufferptr( + extent->get_bptr(), + current - extent->get_laddr(), + end - current)); + current = end; + return seastar::now(); + }).handle_error( + read_ertr::pass_further{}, + crimson::ct_error::assert_all{ + "ObjectDataHandler::read hit invalid error" + } + ); + } + }); + }); + }); + }).safe_then([&ret] { + return std::move(ret); + }); + }); +} + +ObjectDataHandler::truncate_ret ObjectDataHandler::truncate( + context_t ctx, + objaddr_t offset) +{ + return with_object_data( + ctx, + [this, ctx, offset](auto &object_data) { + if (offset < object_data.get_reserved_data_len()) { + return trim_data_reservation(ctx, object_data, offset); + } else if (offset > object_data.get_reserved_data_len()) { + return prepare_data_reservation( + ctx, + object_data, + offset); + } else { + return truncate_ertr::now(); + } + }); +} + +ObjectDataHandler::clear_ret ObjectDataHandler::clear( + context_t ctx) +{ + return with_object_data( + ctx, + [this, ctx](auto &object_data) { + return trim_data_reservation(ctx, object_data, 0); + }); +} + +} diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h new file mode 100644 index 00000000000..c7dc078fc4b --- /dev/null +++ b/src/crimson/os/seastore/object_data_handler.h @@ -0,0 +1,109 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include + +#include "include/buffer.h" + +#include "test/crimson/seastore/test_block.h" // TODO + +#include "crimson/os/seastore/onode.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/transaction.h" + +namespace crimson::os::seastore { + +struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent { + using Ref = TCachedExtentRef; + + ObjectDataBlock(ceph::bufferptr &&ptr) + : LogicalCachedExtent(std::move(ptr)) {} + ObjectDataBlock(const ObjectDataBlock &other) + : LogicalCachedExtent(other) {} + + CachedExtentRef duplicate_for_write() final { + return CachedExtentRef(new ObjectDataBlock(*this)); + }; + + static constexpr extent_types_t TYPE = extent_types_t::OBJECT_DATA_BLOCK; + extent_types_t get_type() const final { + return TYPE; + } + + ceph::bufferlist get_delta() final { + /* Currently, we always allocate fresh ObjectDataBlock's rather than + * mutating existing ones. */ + ceph_assert(0 == "Should be impossible"); + } + + void apply_delta(const ceph::bufferlist &bl) final { + // See get_delta() + ceph_assert(0 == "Should be impossible"); + } +}; +using ObjectDataBlockRef = TCachedExtentRef; + +class ObjectDataHandler { +public: + using base_ertr = TransactionManager::base_ertr; + + struct context_t { + TransactionManager &tm; + Transaction &t; + Onode &onode; + }; + + /// Writes bl to [offset, offset + bl.length()) + using write_ertr = base_ertr; + using write_ret = write_ertr::future<>; + write_ret write( + context_t ctx, + objaddr_t offset, + const bufferlist &bl); + + /// Reads data in [offset, offset + len) + using read_ertr = base_ertr; + using read_ret = read_ertr::future; + read_ret read( + context_t ctx, + objaddr_t offset, + extent_len_t len); + + /// Clears data past offset + using truncate_ertr = base_ertr; + using truncate_ret = truncate_ertr::future<>; + truncate_ret truncate( + context_t ctx, + objaddr_t offset); + + /// Clears data and reservation + using clear_ertr = base_ertr; + using clear_ret = clear_ertr::future<>; + clear_ret clear(context_t ctx); + +private: + /// Updates region [_offset, _offset + bl.length) to bl + write_ret overwrite( + context_t ctx, ///< [in] ctx + laddr_t offset, ///< [in] write offset + bufferlist &&bl, ///< [in] buffer to write + lba_pin_list_t &&pins ///< [in] set of pins overlapping above region + ); + + /// Ensures object_data reserved region is prepared + write_ret prepare_data_reservation( + context_t ctx, + object_data_t &object_data, + extent_len_t size); + + /// Trims data past size + clear_ret trim_data_reservation( + context_t ctx, + object_data_t &object_data, + extent_len_t size); +}; + +} diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc index 59d8050a212..c325dc4845f 100644 --- a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc @@ -603,25 +603,21 @@ omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth) { ceph_assert(depth > 0); if (depth > 1) { - return oc.tm.read_extents(oc.t, laddr, OMAP_BLOCK_SIZE + return oc.tm.read_extent(oc.t, laddr, OMAP_BLOCK_SIZE ).handle_error( omap_load_extent_ertr::pass_further{}, crimson::ct_error::assert_all{ "Invalid error in omap_load_extent" } ).safe_then( - [](auto&& extents) { - assert(extents.size() == 1); - [[maybe_unused]] auto [laddr, e] = extents.front(); + [](auto&& e) { return seastar::make_ready_future(std::move(e)); }); } else { - return oc.tm.read_extents(oc.t, laddr, OMAP_BLOCK_SIZE + return oc.tm.read_extent(oc.t, laddr, OMAP_BLOCK_SIZE ).handle_error( omap_load_extent_ertr::pass_further{}, crimson::ct_error::assert_all{ "Invalid error in omap_load_extent" } ).safe_then( - [](auto&& extents) { - assert(extents.size() == 1); - [[maybe_unused]] auto [laddr, e] = extents.front(); + [](auto&& e) { return seastar::make_ready_future(std::move(e)); }); } diff --git a/src/crimson/os/seastore/onode.h b/src/crimson/os/seastore/onode.h index 3b9dd383a90..21c0fba8aa5 100644 --- a/src/crimson/os/seastore/onode.h +++ b/src/crimson/os/seastore/onode.h @@ -16,6 +16,8 @@ namespace crimson::os::seastore { struct onode_layout_t { ceph_le32 size{0}; omap_root_le_t omap_root; + + object_data_le_t object_data; } __attribute__((packed)); class Transaction; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h index f80b99fabe0..4b584372d1d 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h @@ -75,10 +75,8 @@ class SeastoreNodeExtentManager final: public NodeExtentManager { tm_future read_extent( Transaction& t, laddr_t addr, extent_len_t len) override { logger().debug("OTree::Seastore: reading {}B at {:#x} ...", len, addr); - return tm.read_extents(t, addr, len - ).safe_then([addr, len](auto&& extents) { - assert(extents.size() == 1); - [[maybe_unused]] auto [laddr, e] = extents.front(); + return tm.read_extent(t, addr, len + ).safe_then([addr, len](auto&& e) { logger().trace("OTree::Seastore: read {}B at {:#x}", e->get_length(), e->get_laddr()); assert(e->get_laddr() == addr); diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index a38462b7cd7..211eb5a1f86 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -19,6 +19,7 @@ #include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h" #include "crimson/os/seastore/segment_manager/ephemeral.h" #include "crimson/os/seastore/onode_manager.h" +#include "crimson/os/seastore/object_data_handler.h" namespace { seastar::logger& logger() { @@ -169,7 +170,19 @@ SeaStore::read_errorator::future SeaStore::read( size_t len, uint32_t op_flags) { - return read_errorator::make_ready_future(); + return repeat_with_onode( + ch, + oid, + [=](auto &t, auto &onode) { + return ObjectDataHandler().read( + ObjectDataHandler::context_t{ + *transaction_manager, + t, + onode, + }, + offset, + len); + }); } SeaStore::read_errorator::future SeaStore::readv( @@ -213,10 +226,10 @@ seastar::future SeaStore::stat( struct stat st; auto &olayout = onode.get_layout(); st.st_size = olayout.size; - st.st_blksize = 4096; + st.st_blksize = transaction_manager->get_block_size(); st.st_blocks = (st.st_size + st.st_blksize - 1) / st.st_blksize; st.st_nlink = 1; - return seastar::make_ready_future(); + return seastar::make_ready_future(st); }).handle_error( crimson::ct_error::assert_all{ "Invalid error in SeaStore::stat" @@ -529,7 +542,9 @@ SeaStore::tm_ret SeaStore::_do_transaction_step( uint32_t fadvise_flags = i.get_fadvise_flags(); ceph::bufferlist bl; i.decode_bl(bl); - return _write(ctx, get_onode(op->oid), off, len, bl, fadvise_flags); + return _write( + ctx, get_onode(op->oid), off, len, std::move(bl), + fadvise_flags); } break; case Transaction::OP_TRUNCATE: @@ -545,7 +560,7 @@ SeaStore::tm_ret SeaStore::_do_transaction_step( i.decode_bl(bl); std::map to_set; to_set[name] = bufferptr(bl.c_str(), bl.length()); - return _setattrs(ctx, get_onode(op->oid), to_set); + return _setattrs(ctx, get_onode(op->oid), std::move(to_set)); } break; case Transaction::OP_MKCOLL: @@ -571,14 +586,14 @@ SeaStore::tm_ret SeaStore::_do_transaction_step( { ceph::bufferlist bl; i.decode_bl(bl); - return _omap_set_header(ctx, get_onode(op->oid), bl); + return _omap_set_header(ctx, get_onode(op->oid), std::move(bl)); } break; case Transaction::OP_OMAP_RMKEYS: { omap_keys_t keys; i.decode_keyset(keys); - return _omap_rmkeys(ctx, get_onode(op->oid), keys); + return _omap_rmkeys(ctx, get_onode(op->oid), std::move(keys)); } break; case Transaction::OP_OMAP_RMKEYRANGE: @@ -586,7 +601,9 @@ SeaStore::tm_ret SeaStore::_do_transaction_step( string first, last; first = i.decode_string(); last = i.decode_string(); - return _omap_rmkeyrange(ctx, get_onode(op->oid), first, last); + return _omap_rmkeyrange( + ctx, get_onode(op->oid), + std::move(first), std::move(last)); } break; case Transaction::OP_COLL_HINT: @@ -626,13 +643,30 @@ SeaStore::tm_ret SeaStore::_touch( SeaStore::tm_ret SeaStore::_write( internal_context_t &ctx, OnodeRef &onode, - uint64_t offset, size_t len, const ceph::bufferlist& bl, + uint64_t offset, size_t len, + ceph::bufferlist &&_bl, uint32_t fadvise_flags) { - logger().debug("{}: {} {} ~ {}", + logger().debug("SeaStore::{}: {} {} ~ {}", __func__, *onode, offset, len); - assert(len == bl.length()); - return tm_ertr::now(); + { + auto &object_size = onode->get_mutable_layout(*ctx.transaction).size; + object_size = std::max( + offset + len, + object_size); + } + return seastar::do_with( + std::move(_bl), + [=, &ctx, &onode](auto &bl) { + return ObjectDataHandler().write( + ObjectDataHandler::context_t{ + *transaction_manager, + *ctx.transaction, + *onode, + }, + offset, + bl); + }); } SeaStore::tm_ret SeaStore::_omap_set_values( @@ -685,7 +719,7 @@ SeaStore::tm_ret SeaStore::_omap_set_values( SeaStore::tm_ret SeaStore::_omap_set_header( internal_context_t &ctx, OnodeRef &onode, - const ceph::bufferlist &header) + ceph::bufferlist &&header) { logger().debug( "{}: {} {} bytes", @@ -697,7 +731,7 @@ SeaStore::tm_ret SeaStore::_omap_set_header( SeaStore::tm_ret SeaStore::_omap_rmkeys( internal_context_t &ctx, OnodeRef &onode, - const omap_keys_t& keys) + omap_keys_t &&keys) { logger().debug( "{} {} {} keys", @@ -709,9 +743,11 @@ SeaStore::tm_ret SeaStore::_omap_rmkeys( return seastar::do_with( BtreeOMapManager(*transaction_manager), onode->get_layout().omap_root.get(), - [&ctx, &onode, &keys, this]( + std::move(keys), + [&ctx, &onode, this]( auto &omap_manager, - auto &omap_root) { + auto &omap_root, + auto &keys) { return crimson::do_for_each( keys.begin(), keys.end(), @@ -733,8 +769,8 @@ SeaStore::tm_ret SeaStore::_omap_rmkeys( SeaStore::tm_ret SeaStore::_omap_rmkeyrange( internal_context_t &ctx, OnodeRef &onode, - const std::string &first, - const std::string &last) + std::string first, + std::string last) { logger().debug( "{} {} first={} last={}", @@ -748,15 +784,22 @@ SeaStore::tm_ret SeaStore::_truncate( OnodeRef &onode, uint64_t size) { - logger().debug("{} onode={} size={}", + logger().debug("SeaStore::{} onode={} size={}", __func__, *onode, size); - return tm_ertr::now(); + onode->get_mutable_layout(*ctx.transaction).size = size; + return ObjectDataHandler().truncate( + ObjectDataHandler::context_t{ + *transaction_manager, + *ctx.transaction, + *onode + }, + size); } SeaStore::tm_ret SeaStore::_setattrs( internal_context_t &ctx, OnodeRef &onode, - std::map& aset) + std::map &&aset) { logger().debug("{} onode={}", __func__, *onode); diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index 120be167d9f..8c71b96b82f 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -228,7 +228,8 @@ private: tm_ret _write( internal_context_t &ctx, OnodeRef &onode, - uint64_t offset, size_t len, const ceph::bufferlist& bl, + uint64_t offset, size_t len, + ceph::bufferlist &&bl, uint32_t fadvise_flags); tm_ret _omap_set_values( internal_context_t &ctx, @@ -237,23 +238,23 @@ private: tm_ret _omap_set_header( internal_context_t &ctx, OnodeRef &onode, - const ceph::bufferlist &header); + ceph::bufferlist &&header); tm_ret _omap_rmkeys( internal_context_t &ctx, OnodeRef &onode, - const omap_keys_t& aset); + omap_keys_t &&aset); tm_ret _omap_rmkeyrange( internal_context_t &ctx, OnodeRef &onode, - const std::string &first, - const std::string &last); + std::string first, + std::string last); tm_ret _truncate( internal_context_t &ctx, OnodeRef &onode, uint64_t size); tm_ret _setattrs( internal_context_t &ctx, OnodeRef &onode, - std::map& aset); + std::map &&aset); tm_ret _create_collection( internal_context_t &ctx, const coll_t& cid, int bits); diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index 0ece4f4a370..a651d23cd90 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -13,6 +13,8 @@ std::ostream &segment_to_stream(std::ostream &out, const segment_id_t &t) return out << "BLOCK_REL_SEG"; else if (t == RECORD_REL_SEG_ID) return out << "RECORD_REL_SEG"; + else if (t == ZERO_SEG_ID) + return out << "ZERO_SEG"; else if (t == FAKE_SEG_ID) return out << "FAKE_SEG"; else @@ -53,10 +55,6 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t) return out << "LADDR_INTERNAL"; case extent_types_t::LADDR_LEAF: return out << "LADDR_LEAF"; - case extent_types_t::EXTMAP_INNER: - return out << "EXTMAP_INNER"; - case extent_types_t::EXTMAP_LEAF: - return out << "EXTMAP_LEAF"; case extent_types_t::ONODE_BLOCK_STAGED: return out << "ONODE_BLOCK_STAGED"; case extent_types_t::OMAP_INNER: @@ -65,6 +63,8 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t) return out << "OMAP_LEAF"; case extent_types_t::COLL_BLOCK: return out << "COLL_BLOCK"; + case extent_types_t::OBJECT_DATA_BLOCK: + return out << "OBJECT_DATA_BLOCK"; case extent_types_t::TEST_BLOCK: return out << "TEST_BLOCK"; case extent_types_t::TEST_BLOCK_PHYSICAL: diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 47d828d8ac7..de6485ba4b0 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -43,11 +43,16 @@ constexpr segment_id_t RECORD_REL_SEG_ID = std::numeric_limits::max() - 2; constexpr segment_id_t BLOCK_REL_SEG_ID = std::numeric_limits::max() - 3; - // for tests which generate fake paddrs constexpr segment_id_t FAKE_SEG_ID = std::numeric_limits::max() - 4; +/* Used to denote references to notional zero filled segment, mainly + * in denoting reserved laddr ranges for unallocated object data. + */ +constexpr segment_id_t ZERO_SEG_ID = + std::numeric_limits::max() - 5; + std::ostream &segment_to_stream(std::ostream &, const segment_id_t &t); // Offset within a segment on disk, see SegmentManager @@ -103,6 +108,27 @@ struct paddr_t { return segment == BLOCK_REL_SEG_ID; } + /// Denotes special zero segment addr + bool is_zero() const { + return segment == ZERO_SEG_ID; + } + + /// Denotes special null segment addr + bool is_null() const { + return segment == NULL_SEG_ID; + } + + /** + * is_real + * + * indicates whether addr reflects a physical location, absolute + * or relative. FAKE segments also count as real so as to reflect + * the way in which unit tests use them. + */ + bool is_real() const { + return !is_zero() && !is_null(); + } + paddr_t add_offset(segment_off_t o) const { return paddr_t{segment, offset + o}; } @@ -175,6 +201,9 @@ constexpr paddr_t make_block_relative_paddr(segment_off_t off) { constexpr paddr_t make_fake_paddr(segment_off_t off) { return paddr_t{FAKE_SEG_ID, off}; } +constexpr paddr_t zero_paddr() { + return paddr_t{ZERO_SEG_ID, 0}; +} struct __attribute((packed)) paddr_le_t { ceph_le32 segment = ceph_le32(NULL_SEG_ID); @@ -195,7 +224,8 @@ struct __attribute((packed)) paddr_le_t { std::ostream &operator<<(std::ostream &out, const paddr_t &rhs); using objaddr_t = uint32_t; -constexpr objaddr_t OBJ_ADDR_MIN = std::numeric_limits::min(); +constexpr objaddr_t OBJ_ADDR_MAX = std::numeric_limits::max(); +constexpr objaddr_t OBJ_ADDR_NULL = OBJ_ADDR_MAX - 1; /* Monotonically increasing identifier for the location of a * journal_record. @@ -282,12 +312,11 @@ enum class extent_types_t : uint8_t { ROOT = 0, LADDR_INTERNAL = 1, LADDR_LEAF = 2, - EXTMAP_INNER = 4, - EXTMAP_LEAF = 5, - OMAP_INNER = 6, - OMAP_LEAF = 7, - ONODE_BLOCK_STAGED = 8, - COLL_BLOCK = 9, + OMAP_INNER = 4, + OMAP_LEAF = 5, + ONODE_BLOCK_STAGED = 6, + COLL_BLOCK = 7, + OBJECT_DATA_BLOCK = 8, // Test Block Types TEST_BLOCK = 0xF0, @@ -367,6 +396,71 @@ struct record_t { std::vector deltas; }; +class object_data_t { + laddr_t reserved_data_base = L_ADDR_NULL; + extent_len_t reserved_data_len = 0; + + bool dirty = false; +public: + object_data_t( + laddr_t reserved_data_base, + extent_len_t reserved_data_len) + : reserved_data_base(reserved_data_base), + reserved_data_len(reserved_data_len) {} + + laddr_t get_reserved_data_base() const { + return reserved_data_base; + } + + extent_len_t get_reserved_data_len() const { + return reserved_data_len; + } + + bool is_null() const { + return reserved_data_base == L_ADDR_NULL; + } + + bool must_update() const { + return dirty; + } + + void update_reserved( + laddr_t base, + extent_len_t len) { + dirty = true; + reserved_data_base = base; + reserved_data_len = len; + } + + void update_len( + extent_len_t len) { + dirty = true; + reserved_data_len = len; + } + + void clear() { + dirty = true; + reserved_data_base = L_ADDR_NULL; + reserved_data_len = 0; + } +}; + +struct __attribute__((packed)) object_data_le_t { + laddr_le_t reserved_data_base = laddr_le_t(L_ADDR_NULL); + extent_len_le_t reserved_data_len = init_extent_len_le(0); + + void update(const object_data_t &nroot) { + reserved_data_base = nroot.get_reserved_data_base(); + reserved_data_len = init_extent_len_le(nroot.get_reserved_data_len()); + } + + object_data_t get() const { + return object_data_t( + reserved_data_base, + reserved_data_len); + } +}; + struct omap_root_t { laddr_t addr = L_ADDR_NULL; depth_t depth = 0; diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index f9689254b91..3b7d74733a7 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -156,7 +156,7 @@ TransactionManager::ref_ret TransactionManager::dec_ref( { return lba_manager->decref_extent(t, offset ).safe_then([this, offset, &t](auto result) -> ref_ret { - if (result.refcount == 0) { + if (result.refcount == 0 && !result.addr.is_zero()) { logger().debug( "TransactionManager::dec_ref: offset {} refcount 0", offset); diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 93a24c8109f..2fa84cb7462 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -96,64 +96,91 @@ public: } /** - * Read extents corresponding to specified lba range + * get_pins + * + * Get logical pins overlapping offset~length */ - using read_extent_ertr = LBAManager::get_mapping_ertr::extend_ertr< - SegmentManager::read_ertr>; - template - using read_extent_ret = read_extent_ertr::future>; - template - read_extent_ret read_extents( + using get_pins_ertr = LBAManager::get_mapping_ertr; + using get_pins_ret = get_pins_ertr::future; + get_pins_ret get_pins( Transaction &t, laddr_t offset, - extent_len_t length) - { - std::unique_ptr> ret = - std::make_unique>(); - auto &ret_ref = *ret; - std::unique_ptr pin_list = - std::make_unique(); - auto &pin_list_ref = *pin_list; + extent_len_t length) { return lba_manager->get_mapping( - t, offset, length - ).safe_then([this, &t, &pin_list_ref, &ret_ref](auto pins) { + t, offset, length); + } + + /** + * pin_to_extent + * + * Get extent mapped at pin. + */ + using pin_to_extent_ertr = get_pins_ertr::extend_ertr< + SegmentManager::read_ertr>; + template + using pin_to_extent_ret = pin_to_extent_ertr::future< + TCachedExtentRef>; + template + pin_to_extent_ret pin_to_extent( + Transaction &t, + LBAPinRef pin) { + using ret = pin_to_extent_ret; + crimson::get_logger(ceph_subsys_filestore).debug( + "pin_to_extent: getting extent {}", + *pin); + return cache->get_extent( + t, + pin->get_paddr(), + pin->get_length() + ).safe_then([this, pin=std::move(pin)](auto ref) mutable -> ret { + if (!ref->has_pin()) { + if (pin->has_been_invalidated() || ref->has_been_invalidated()) { + return crimson::ct_error::eagain::make(); + } else { + ref->set_pin(std::move(pin)); + lba_manager->add_pin(ref->get_pin()); + } + } crimson::get_logger(ceph_subsys_filestore).debug( - "read_extents: mappings {}", - pins); - pins.swap(pin_list_ref); - return crimson::do_for_each( - pin_list_ref.begin(), - pin_list_ref.end(), - [this, &t, &ret_ref](auto &pin) { - crimson::get_logger(ceph_subsys_filestore).debug( - "read_extents: get_extent {}~{}", - pin->get_paddr(), - pin->get_length()); - return cache->get_extent( - t, - pin->get_paddr(), - pin->get_length() - ).safe_then([this, &pin, &ret_ref](auto ref) mutable - -> read_extent_ertr::future<> { - if (!ref->has_pin()) { - if (pin->has_been_invalidated() || ref->has_been_invalidated()) { - return crimson::ct_error::eagain::make(); - } else { - ref->set_pin(std::move(pin)); - lba_manager->add_pin(ref->get_pin()); - } - } - ret_ref.push_back(std::make_pair(ref->get_laddr(), ref)); - crimson::get_logger(ceph_subsys_filestore).debug( - "read_extents: got extent {}", - *ref); - return read_extent_ertr::now(); - }); - }); - }).safe_then([ret=std::move(ret), pin_list=std::move(pin_list)]() mutable { - return read_extent_ret( - read_extent_ertr::ready_future_marker{}, - std::move(*ret)); + "pin_to_extent: got extent {}", + *ref); + return pin_to_extent_ret( + pin_to_extent_ertr::ready_future_marker{}, + std::move(ref)); + }); + } + + /** + * read_extent + * + * Read extent of type T at offset~length + */ + using read_extent_ertr = get_pins_ertr::extend_ertr< + SegmentManager::read_ertr>; + template + using read_extent_ret = read_extent_ertr::future< + TCachedExtentRef>; + template + read_extent_ret read_extent( + Transaction &t, + laddr_t offset, + extent_len_t length) { + return get_pins( + t, offset, length + ).safe_then([this, &t, offset, length](auto pins) { + if (pins.size() != 1 || !pins.front()->get_paddr().is_real()) { + auto &logger = crimson::get_logger(ceph_subsys_filestore); + logger.error( + "TransactionManager::read_extent offset {} len {} got {} extents:", + offset, + length, + pins.size()); + for (auto &i: pins) { + logger.error("\t{}", *i); + } + ceph_assert(0 == "Should be impossible"); + } + return pin_to_extent(t, std::move(pins.front())); }); } @@ -240,6 +267,31 @@ public: }); } + using reserve_extent_ertr = alloc_extent_ertr; + using reserve_extent_ret = reserve_extent_ertr::future; + reserve_extent_ret reserve_region( + Transaction &t, + laddr_t hint, + extent_len_t len) { + return lba_manager->alloc_extent( + t, + hint, + len, + zero_paddr()); + } + + using find_hole_ertr = LBAManager::find_hole_ertr; + using find_hole_ret = LBAManager::find_hole_ret; + find_hole_ret find_hole( + Transaction &t, + laddr_t hint, + extent_len_t len) { + return lba_manager->find_hole( + t, + hint, + len); + } + /* alloc_extents * * allocates more than one new blocks of type T. @@ -372,6 +424,10 @@ public: croot->get_root().collection_root.update(cmroot); } + extent_len_t get_block_size() const { + return segment_manager.get_block_size(); + } + ~TransactionManager(); private: diff --git a/src/crimson/tools/store-nbd.cc b/src/crimson/tools/store-nbd.cc index 3aa3d7968b9..38c4a49c495 100644 --- a/src/crimson/tools/store-nbd.cc +++ b/src/crimson/tools/store-nbd.cc @@ -556,6 +556,44 @@ public: ); } + auto read_extents( + Transaction &t, + laddr_t offset, + extent_len_t length) { + return seastar::do_with( + lba_pin_list_t(), + lextent_list_t(), + [this, &t, offset, length](auto &pins, auto &ret) { + return tm->get_pins( + t, offset, length + ).safe_then([this, &t, &pins, &ret](auto _pins) { + _pins.swap(pins); + logger().debug("read_extents: mappings {}", pins); + return crimson::do_for_each( + pins.begin(), + pins.end(), + [this, &t, &ret](auto &&pin) { + logger().debug( + "read_extents: get_extent {}~{}", + pin->get_paddr(), + pin->get_length()); + return tm->pin_to_extent( + t, + std::move(pin) + ).safe_then([this, &ret](auto ref) mutable { + ret.push_back(std::make_pair(ref->get_laddr(), ref)); + logger().debug( + "read_extents: got extent {}", + *ref); + return seastar::now(); + }); + }).safe_then([&ret] { + return std::move(ret); + }); + }); + }); + } + seastar::future read( off_t offset, size_t size) final { @@ -568,7 +606,7 @@ public: return seastar::do_with( tm->create_transaction(), [=, &blret](auto &t) { - return tm->read_extents(*t, offset, size + return read_extents(*t, offset, size ).safe_then([=, &blret](auto ext_list) mutable { size_t cur = offset; for (auto &i: ext_list) { diff --git a/src/test/crimson/seastore/CMakeLists.txt b/src/test/crimson/seastore/CMakeLists.txt index 7b58c8a14b1..f1585bf0ec0 100644 --- a/src/test/crimson/seastore/CMakeLists.txt +++ b/src/test/crimson/seastore/CMakeLists.txt @@ -38,13 +38,13 @@ target_link_libraries( crimson::gtest crimson-seastore) -add_executable(unittest-extmap-manager - test_extmap_manager.cc +add_executable(unittest-object-data-handler + test_object_data_handler.cc ../gtest_seastar.cc) -add_ceph_test(unittest-extmap-manager - unittest-extmap-manager --memory 256M --smp 1) +add_ceph_unittest(unittest-object-data-handler + --memory 256M --smp 1) target_link_libraries( - unittest-extmap-manager + unittest-object-data-handler crimson::gtest crimson-seastore crimson-os diff --git a/src/test/crimson/seastore/test_extmap_manager.cc b/src/test/crimson/seastore/test_extmap_manager.cc deleted file mode 100644 index c68a6dbc467..00000000000 --- a/src/test/crimson/seastore/test_extmap_manager.cc +++ /dev/null @@ -1,286 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "test/crimson/gtest_seastar.h" -#include "test/crimson/seastore/transaction_manager_test_state.h" - -#include "crimson/os/seastore/cache.h" -#include "crimson/os/seastore/transaction_manager.h" -#include "crimson/os/seastore/segment_manager.h" -#include "crimson/os/seastore/extentmap_manager.h" - -#include "test/crimson/seastore/test_block.h" - -using namespace crimson; -using namespace crimson::os; -using namespace crimson::os::seastore; - -namespace { - [[maybe_unused]] seastar::logger& logger() { - return crimson::get_logger(ceph_subsys_test); - } -} - - -struct extentmap_manager_test_t : - public seastar_test_suite_t, - TMTestState { - - ExtentMapManagerRef extmap_manager; - - extentmap_manager_test_t() {} - - seastar::future<> set_up_fut() final { - return tm_setup().then([this] { - extmap_manager = extentmap_manager::create_extentmap_manager(*tm); - return seastar::now(); - }); - } - - seastar::future<> tear_down_fut() final { - return tm_teardown().then([this] { - extmap_manager.reset(); - return seastar::now(); - }); - } - - using test_extmap_t = std::map; - test_extmap_t test_ext_mappings; - - extent_mapping_t insert_extent( - extmap_root_t &extmap_root, - Transaction &t, - uint32_t lo, - lext_map_val_t val) { - auto extent = extmap_manager->add_lextent(extmap_root, t, lo, val).unsafe_get0(); - EXPECT_EQ(lo, extent.logical_offset); - EXPECT_EQ(val.laddr, extent.laddr); - EXPECT_EQ(val.length, extent.length); - test_ext_mappings.emplace(extent.logical_offset, - lext_map_val_t{extent.laddr, extent.length}); - return extent; - } - - extent_map_list_t find_extent( - extmap_root_t &extmap_root, - Transaction &t, - uint32_t lo, - uint32_t len) { - auto extent = extmap_manager->find_lextent(extmap_root, t, lo, len).unsafe_get0(); - EXPECT_EQ(lo, extent.front().logical_offset); - EXPECT_EQ(len, extent.front().length); - return extent; - } - - extent_map_list_t findno_extent( - extmap_root_t &extmap_root, - Transaction &t, - uint32_t lo, - uint32_t len) { - auto extent = extmap_manager->find_lextent(extmap_root, t, lo, len).unsafe_get0(); - EXPECT_EQ(extent.empty(), true); - return extent; - } - - void rm_extent( - extmap_root_t &extmap_root, - Transaction &t, - uint32_t lo, - lext_map_val_t val ) { - auto ret = extmap_manager->rm_lextent(extmap_root, t, lo, val).unsafe_get0(); - EXPECT_TRUE(ret); - test_ext_mappings.erase(lo); - } - - void check_mappings(extmap_root_t &extmap_root, Transaction &t) { - for (const auto& [lo, ext]: test_ext_mappings){ - const auto ext_list = find_extent(extmap_root, t, lo, ext.length); - ASSERT_EQ(ext_list.size(), 1); - const auto& ext_map = ext_list.front(); - EXPECT_EQ(ext.laddr, ext_map.laddr); - EXPECT_EQ(ext.length, ext_map.length); - } - } - - void check_mappings(extmap_root_t &extmap_root) { - auto t = tm->create_transaction(); - check_mappings(extmap_root, *t); - } - - void replay() { - logger().debug("{}: begin", __func__); - restart(); - extmap_manager = extentmap_manager::create_extentmap_manager(*tm); - logger().debug("{}: end", __func__); - } - - void submit_transaction(TransactionRef &&t) { - tm->submit_transaction(std::move(t)).unsafe_get0(); - segment_cleaner->run_until_halt().get0(); - } -}; - -TEST_F(extentmap_manager_test_t, basic) -{ - run_async([this] { - extmap_root_t extmap_root(0, L_ADDR_NULL); - { - auto t = tm->create_transaction(); - extmap_root = extmap_manager->initialize_extmap(*t).unsafe_get0(); - submit_transaction(std::move(t)); - } - - uint32_t len = 4096; - uint32_t lo = 0x1 * len; - { - auto t = tm->create_transaction(); - logger().debug("first transaction"); - [[maybe_unused]] auto addref = insert_extent(extmap_root, *t, lo, {lo, len}); - [[maybe_unused]] auto seekref = find_extent(extmap_root, *t, lo, len); - submit_transaction(std::move(t)); - } - { - auto t = tm->create_transaction(); - logger().debug("second transaction"); - auto seekref = find_extent(extmap_root, *t, lo, len); - rm_extent(extmap_root, *t, lo, {seekref.front().laddr, len}); - [[maybe_unused]] auto seekref2 = findno_extent(extmap_root, *t, lo, len); - submit_transaction(std::move(t)); - } - { - auto t = tm->create_transaction(); - logger().debug("third transaction"); - [[maybe_unused]] auto seekref = findno_extent(extmap_root, *t, lo, len); - submit_transaction(std::move(t)); - } - }); -} - -TEST_F(extentmap_manager_test_t, force_leafnode_split) -{ - run_async([this] { - extmap_root_t extmap_root(0, L_ADDR_NULL); - { - auto t = tm->create_transaction(); - extmap_root = extmap_manager->initialize_extmap(*t).unsafe_get0(); - submit_transaction(std::move(t)); - } - uint32_t len = 4096; - uint32_t lo = 0; - for (unsigned i = 0; i < 40; i++) { - auto t = tm->create_transaction(); - logger().debug("opened transaction"); - for (unsigned j = 0; j < 10; ++j) { - [[maybe_unused]] auto addref = insert_extent(extmap_root, *t, lo, {lo, len}); - lo += len; - if ((i % 20 == 0) && (j == 5)) { - check_mappings(extmap_root, *t); - } - } - logger().debug("force split submit transaction i = {}", i); - submit_transaction(std::move(t)); - check_mappings(extmap_root); - } - }); - -} - -TEST_F(extentmap_manager_test_t, force_leafnode_split_merge) -{ - run_async([this] { - extmap_root_t extmap_root(0, L_ADDR_NULL); - { - auto t = tm->create_transaction(); - extmap_root = extmap_manager->initialize_extmap(*t).unsafe_get0(); - submit_transaction(std::move(t)); - } - uint32_t len = 4096; - uint32_t lo = 0; - for (unsigned i = 0; i < 80; i++) { - auto t = tm->create_transaction(); - logger().debug("opened split_merge transaction"); - for (unsigned j = 0; j < 5; ++j) { - [[maybe_unused]] auto addref = insert_extent(extmap_root, *t, lo, {lo, len}); - lo += len; - if ((i % 10 == 0) && (j == 3)) { - check_mappings(extmap_root, *t); - } - } - logger().debug("submitting transaction"); - submit_transaction(std::move(t)); - if (i % 50 == 0) { - check_mappings(extmap_root); - } - } - auto t = tm->create_transaction(); - int i = 0; - for (auto iter = test_ext_mappings.begin(); iter != test_ext_mappings.end();) { - auto [lo, ext] = *iter; - ++iter; - if (i % 3 != 0) { - rm_extent(extmap_root, *t, lo, ext); - } - i++; - - if (i % 10 == 0) { - logger().debug("submitting transaction i= {}", i); - submit_transaction(std::move(t)); - t = tm->create_transaction(); - } - if (i % 100 == 0) { - logger().debug("check_mappings i= {}", i); - check_mappings(extmap_root, *t); - check_mappings(extmap_root); - } - } - logger().debug("finally submitting transaction "); - submit_transaction(std::move(t)); - }); -} - -TEST_F(extentmap_manager_test_t, force_leafnode_split_merge_replay) -{ - run_async([this] { - extmap_root_t extmap_root(0, L_ADDR_NULL); - { - auto t = tm->create_transaction(); - extmap_root = extmap_manager->initialize_extmap(*t).unsafe_get0(); - submit_transaction(std::move(t)); - replay(); - } - uint32_t len = 4096; - uint32_t lo = 0; - for (unsigned i = 0; i < 50; i++) { - auto t = tm->create_transaction(); - logger().debug("opened split_merge transaction"); - for (unsigned j = 0; j < 5; ++j) { - [[maybe_unused]] auto addref = insert_extent(extmap_root, *t, lo, {lo, len}); - lo += len; - } - logger().debug("submitting transaction"); - submit_transaction(std::move(t)); - } - replay(); - auto t = tm->create_transaction(); - int i = 0; - for (auto iter = test_ext_mappings.begin(); iter != test_ext_mappings.end();) { - auto [lo, ext] = *iter; - ++iter; - rm_extent(extmap_root, *t, lo, ext); - i++; - - if (i % 10 == 0) { - logger().debug("submitting transaction i= {}", i); - submit_transaction(std::move(t)); - t = tm->create_transaction(); - } - if (i% 100 == 0){ - check_mappings(extmap_root); - } - } - logger().debug("finally submitting transaction "); - submit_transaction(std::move(t)); - replay(); - check_mappings(extmap_root); - }); -} diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc new file mode 100644 index 00000000000..45e941ea8d2 --- /dev/null +++ b/src/test/crimson/seastore/test_object_data_handler.cc @@ -0,0 +1,300 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/onode.h" +#include "crimson/os/seastore/object_data_handler.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +class TestOnode : public Onode { + onode_layout_t layout; + bool dirty = false; + +public: + const onode_layout_t &get_layout() const final { + return layout; + } + onode_layout_t &get_mutable_layout(Transaction &t) final { + dirty = true; + return layout; + } + bool is_dirty() const { return dirty; } + ~TestOnode() final = default; +}; + +struct object_data_handler_test_t: + public seastar_test_suite_t, + TMTestState { + OnodeRef onode; + + bufferptr known_contents; + extent_len_t size = 0; + + object_data_handler_test_t() {} + + auto submit_transaction(TransactionRef &&t) { + return tm->submit_transaction(std::move(t) + ).safe_then([this] { + return segment_cleaner->run_until_halt(); + }); + } + + void write(Transaction &t, objaddr_t offset, extent_len_t len, char fill) { + ceph_assert(offset + len <= known_contents.length()); + size = std::max(size, offset + len); + memset( + known_contents.c_str() + offset, + fill, + len); + bufferlist bl; + bl.append( + bufferptr( + known_contents, + offset, + len)); + return ObjectDataHandler().write( + ObjectDataHandler::context_t{ + *tm, + t, + *onode, + }, + offset, + bl).unsafe_get0(); + } + void write(objaddr_t offset, extent_len_t len, char fill) { + auto t = tm->create_transaction(); + write(*t, offset, len, fill); + return submit_transaction(std::move(t)).unsafe_get0(); + } + + void truncate(Transaction &t, objaddr_t offset) { + if (size > offset) { + memset( + known_contents.c_str() + offset, + 0, + size - offset); + ObjectDataHandler().truncate( + ObjectDataHandler::context_t{ + *tm, + t, + *onode + }, + offset).unsafe_get0(); + } + size = offset; + } + void truncate(objaddr_t offset) { + auto t = tm->create_transaction(); + truncate(*t, offset); + return submit_transaction(std::move(t)).unsafe_get0(); + } + + void read(Transaction &t, objaddr_t offset, extent_len_t len) { + bufferlist bl = ObjectDataHandler().read( + ObjectDataHandler::context_t{ + *tm, + t, + *onode + }, + offset, + len).unsafe_get0(); + bufferlist known; + known.append( + bufferptr( + known_contents, + offset, + len)); + EXPECT_EQ(bl.length(), known.length()); + EXPECT_EQ(bl, known); + } + void read(objaddr_t offset, extent_len_t len) { + auto t = tm->create_transaction(); + read(*t, offset, len); + } + void read_near(objaddr_t offset, extent_len_t len, extent_len_t fuzz) { + auto fuzzes = std::vector{-1 * (int32_t)fuzz, 0, (int32_t)fuzz}; + for (auto left_fuzz : fuzzes) { + for (auto right_fuzz : fuzzes) { + read(offset + left_fuzz, len - left_fuzz + right_fuzz); + } + } + } + + seastar::future<> set_up_fut() final { + onode = new TestOnode{}; + known_contents = buffer::create(4<<20 /* 4MB */); + size = 0; + return tm_setup(); + } + + seastar::future<> tear_down_fut() final { + onode.reset(); + size = 0; + return tm_teardown(); + } +}; + +TEST_F(object_data_handler_test_t, single_write) +{ + run_async([this] { + write(1<<20, 8<<10, 'c'); + + read_near(1<<20, 8<<10, 1); + read_near(1<<20, 8<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, multi_write) +{ + run_async([this] { + write((1<<20) - (4<<10), 4<<10, 'a'); + write(1<<20, 4<<10, 'b'); + write((1<<20) + (4<<10), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20)-(4<<10), 12<<10, 1); + read_near((1<<20)-(4<<10), 12<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, write_hole) +{ + run_async([this] { + write((1<<20) - (4<<10), 4<<10, 'a'); + // hole at 1<<20 + write((1<<20) + (4<<10), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20)-(4<<10), 12<<10, 1); + read_near((1<<20)-(4<<10), 12<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, overwrite_single) +{ + run_async([this] { + write((1<<20), 4<<10, 'a'); + write((1<<20), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, overwrite_double) +{ + run_async([this] { + write((1<<20), 4<<10, 'a'); + write((1<<20)+(4<<10), 4<<10, 'c'); + write((1<<20), 8<<10, 'b'); + + read_near(1<<20, 8<<10, 1); + read_near(1<<20, 8<<10, 512); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20) + (4<<10), 4<<10, 1); + read_near((1<<20) + (4<<10), 4<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, overwrite_partial) +{ + run_async([this] { + write((1<<20), 12<<10, 'a'); + read_near(1<<20, 12<<10, 1); + + write((1<<20)+(8<<10), 4<<10, 'b'); + read_near(1<<20, 12<<10, 1); + + write((1<<20)+(4<<10), 4<<10, 'c'); + read_near(1<<20, 12<<10, 1); + + write((1<<20), 4<<10, 'd'); + + read_near(1<<20, 12<<10, 1); + read_near(1<<20, 12<<10, 512); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20) + (4<<10), 4<<10, 1); + read_near((1<<20) + (4<<10), 4<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, unaligned_write) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, (4<<10)+(1<<10), 'a'); + read_near(base-(4<<10), 12<<10, 512); + + base = (1<<20) + (64<<10); + write(base+(1<<10), (4<<10)+(1<<10), 'b'); + read_near(base-(4<<10), 12<<10, 512); + + base = (1<<20) + (128<<10); + write(base-(1<<10), (4<<10)+(2<<20), 'c'); + read_near(base-(4<<10), 12<<10, 512); + }); +} + +TEST_F(object_data_handler_test_t, unaligned_overwrite) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, (128<<10) + (16<<10), 'x'); + + write(base, (4<<10)+(1<<10), 'a'); + read_near(base-(4<<10), 12<<10, 2<<10); + + base = (1<<20) + (64<<10); + write(base+(1<<10), (4<<10)+(1<<10), 'b'); + read_near(base-(4<<10), 12<<10, 2<<10); + + base = (1<<20) + (128<<10); + write(base-(1<<10), (4<<10)+(2<<20), 'c'); + read_near(base-(4<<10), 12<<10, 2<<10); + + read(base, (128<<10) + (16<<10)); + }); +} + +TEST_F(object_data_handler_test_t, truncate) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, 8<<10, 'a'); + write(base+(8<<10), 8<<10, 'b'); + write(base+(16<<10), 8<<10, 'c'); + + truncate(base + (32<<10)); + read(base, 64<<10); + + truncate(base + (24<<10)); + read(base, 64<<10); + + truncate(base + (12<<10)); + read(base, 64<<10); + + truncate(base - (12<<10)); + read(base, 64<<10); + }); +} diff --git a/src/test/crimson/seastore/test_seastore.cc b/src/test/crimson/seastore/test_seastore.cc index 6dc9f4b5d1d..33837bdbb9e 100644 --- a/src/test/crimson/seastore/test_seastore.cc +++ b/src/test/crimson/seastore/test_seastore.cc @@ -59,6 +59,7 @@ struct seastore_test_t : const ghobject_t oid; std::map omap; + bufferlist contents; void set_omap( CTransaction &t, @@ -84,6 +85,88 @@ struct seastore_test_t : std::move(t)).get0(); } + void write( + SeaStore &seastore, + CTransaction &t, + uint64_t offset, + bufferlist bl) { + bufferlist new_contents; + if (offset > 0 && contents.length()) { + new_contents.substr_of( + contents, + 0, + std::min(offset, contents.length()) + ); + } + new_contents.append_zero(offset - new_contents.length()); + new_contents.append(bl); + + auto tail_offset = offset + bl.length(); + if (contents.length() > tail_offset) { + bufferlist tail; + tail.substr_of( + contents, + tail_offset, + contents.length() - tail_offset); + new_contents.append(tail); + } + contents.swap(new_contents); + + t.write( + cid, + oid, + offset, + bl.length(), + bl); + } + void write( + SeaStore &seastore, + uint64_t offset, + bufferlist bl) { + CTransaction t; + write(seastore, t, offset, bl); + seastore.do_transaction( + coll, + std::move(t)).get0(); + } + void write( + SeaStore &seastore, + uint64_t offset, + size_t len, + char fill) { + auto buffer = bufferptr(buffer::create(len)); + ::memset(buffer.c_str(), fill, len); + bufferlist bl; + bl.append(buffer); + write(seastore, offset, bl); + } + + void read( + SeaStore &seastore, + uint64_t offset, + uint64_t len) { + bufferlist to_check; + to_check.substr_of( + contents, + offset, + len); + auto ret = seastore.read( + coll, + oid, + offset, + len).unsafe_get0(); + EXPECT_EQ(ret.length(), to_check.length()); + EXPECT_EQ(ret, to_check); + } + + void check_size(SeaStore &seastore) { + auto st = seastore.stat( + coll, + oid).get0(); + EXPECT_EQ(contents.length(), st.st_size); + } + + void check_omap_key( SeaStore &seastore, const string &key) { @@ -241,3 +324,21 @@ TEST_F(seastore_test_t, omap_test_iterator) test_obj.check_omap(*seastore); }); } + + +TEST_F(seastore_test_t, simple_extent_test) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.write( + *seastore, + 1024, + 1024, + 'a'); + test_obj.read( + *seastore, + 1024, + 1024); + test_obj.check_size(*seastore); + }); +} diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index 745efae1ef6..93a8b222a3f 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -424,13 +424,9 @@ struct transaction_manager_test_t : ceph_assert(test_mappings.contains(addr, t.mapping_delta)); ceph_assert(test_mappings.get(addr, t.mapping_delta).desc.len == len); - auto ret_list = tm->read_extents( + auto ext = tm->read_extent( *t.t, addr, len ).unsafe_get0(); - EXPECT_EQ(ret_list.size(), 1); - auto &ext = ret_list.begin()->second; - auto &laddr = ret_list.begin()->first; - EXPECT_EQ(addr, laddr); EXPECT_EQ(addr, ext->get_laddr()); return ext; }