Merge pull request #40846 from athanatos/sjust/wip-object-data

seastore: initial object data support

Reviewed-by: Chunmei Liu <chunmei.liu@intel.com>
This commit is contained in:
Samuel Just 2021-04-19 23:50:22 -07:00 committed by GitHub
commit 5c1ae5bcd2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 1441 additions and 1529 deletions

View File

@ -27,12 +27,10 @@ add_library(crimson-seastore STATIC
onode_manager/staged-fltree/super.cc
onode_manager/staged-fltree/value.cc
onode_manager/staged-fltree/fltree_onode_manager.cc
extentmap_manager.cc
extentmap_manager/btree/extentmap_btree_node_impl.cc
extentmap_manager/btree/btree_extentmap_manager.cc
collection_manager.cc
collection_manager/flat_collection_manager.cc
collection_manager/collection_flat_node.cc
object_data_handler.cc
seastore.cc
../../../test/crimson/seastore/test_block.cc
${PROJECT_SOURCE_DIR}/src/os/Transaction.cc

View File

@ -6,9 +6,9 @@
// included for get_extent_by_type
#include "crimson/os/seastore/collection_manager/collection_flat_node.h"
#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h"
#include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h"
#include "crimson/os/seastore/object_data_handler.h"
#include "crimson/os/seastore/collection_manager/collection_flat_node.h"
#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h"
#include "test/crimson/seastore/test_block.h"
@ -144,16 +144,14 @@ CachedExtentRef Cache::alloc_new_extent_by_type(
return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length);
case extent_types_t::ONODE_BLOCK_STAGED:
return alloc_new_extent<onode::SeastoreNodeExtent>(t, length);
case extent_types_t::EXTMAP_INNER:
return alloc_new_extent<extentmap_manager::ExtMapInnerNode>(t, length);
case extent_types_t::EXTMAP_LEAF:
return alloc_new_extent<extentmap_manager::ExtMapLeafNode>(t, length);
case extent_types_t::OMAP_INNER:
return alloc_new_extent<omap_manager::OMapInnerNode>(t, length);
case extent_types_t::OMAP_LEAF:
return alloc_new_extent<omap_manager::OMapLeafNode>(t, length);
case extent_types_t::COLL_BLOCK:
return alloc_new_extent<collection_manager::CollectionNode>(t, length);
case extent_types_t::OBJECT_DATA_BLOCK:
return alloc_new_extent<ObjectDataBlock>(t, length);
case extent_types_t::TEST_BLOCK:
return alloc_new_extent<TestBlock>(t, length);
case extent_types_t::TEST_BLOCK_PHYSICAL:
@ -529,16 +527,6 @@ Cache::get_extent_ertr::future<CachedExtentRef> Cache::get_extent_by_type(
).safe_then([](auto extent) {
return CachedExtentRef(extent.detach(), false /* add_ref */);
});
case extent_types_t::EXTMAP_INNER:
return get_extent<extentmap_manager::ExtMapInnerNode>(offset, length
).safe_then([](auto extent) {
return CachedExtentRef(extent.detach(), false /* add_ref */);
});
case extent_types_t::EXTMAP_LEAF:
return get_extent<extentmap_manager::ExtMapLeafNode>(offset, length
).safe_then([](auto extent) {
return CachedExtentRef(extent.detach(), false /* add_ref */);
});
case extent_types_t::OMAP_INNER:
return get_extent<omap_manager::OMapInnerNode>(offset, length
).safe_then([](auto extent) {
@ -559,6 +547,11 @@ Cache::get_extent_ertr::future<CachedExtentRef> Cache::get_extent_by_type(
).safe_then([](auto extent) {
return CachedExtentRef(extent.detach(), false /* add_ref */);
});
case extent_types_t::OBJECT_DATA_BLOCK:
return get_extent<ObjectDataBlock>(offset, length
).safe_then([](auto extent) {
return CachedExtentRef(extent.detach(), false /* add_ref */);
});
case extent_types_t::TEST_BLOCK:
return get_extent<TestBlock>(offset, length
).safe_then([](auto extent) {

View File

@ -173,7 +173,10 @@ public:
std::move(ref));
},
get_extent_ertr::pass_further{},
crimson::ct_error::discard_all{});
crimson::ct_error::assert_all{
"Cache::get_extent: invalid error"
}
);
}
}

View File

@ -47,13 +47,11 @@ FlatCollectionManager::get_coll_root(const coll_root_t &coll_root, Transaction &
logger().debug("FlatCollectionManager: {}", __func__);
assert(coll_root.get_location() != L_ADDR_NULL);
auto cc = get_coll_context(t);
return cc.tm.read_extents<CollectionNode>(
return cc.tm.read_extent<CollectionNode>(
cc.t,
coll_root.get_location(),
coll_root.get_size()
).safe_then([](auto&& extents) {
assert(extents.size() == 1);
[[maybe_unused]] auto [laddr, e] = extents.front();
).safe_then([](auto&& e) {
return get_root_ertr::make_ready_future<CollectionNodeRef>(std::move(e));
});
}

View File

@ -8,7 +8,8 @@
#include "crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h"
namespace crimson::os::seastore::extentmap_manager {
ExtentMapManagerRef create_extentmap_manager(TransactionManager &trans_manager) {
ExtentMapManagerRef create_extentmap_manager(
TransactionManager &trans_manager) {
return ExtentMapManagerRef(new BtreeExtentMapManager(trans_manager));
}

View File

@ -1,124 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once
#include <iosfwd>
#include <list>
#include <boost/intrusive_ptr.hpp>
#include <boost/smart_ptr/intrusive_ref_counter.hpp>
#include <seastar/core/future.hh>
#include "crimson/osd/exceptions.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/transaction_manager.h"
#define PAGE_SIZE 4096
#define EXTMAP_BLOCK_SIZE 4096
namespace crimson::os::seastore {
struct lext_map_val_t {
laddr_t laddr;
extent_len_t length = 0;
lext_map_val_t(
laddr_t laddr,
extent_len_t length)
: laddr(laddr), length(length) {}
};
class extent_mapping_t
{
public:
objaddr_t logical_offset = 0; //offset in object
laddr_t laddr; // lextent start address aligned with block size.
extent_len_t length = 0;
explicit extent_mapping_t(objaddr_t lo) : logical_offset(lo) { }
extent_mapping_t(
objaddr_t lo,
laddr_t laddr,
extent_len_t length)
: logical_offset(lo), laddr(laddr), length(length) {}
~extent_mapping_t() {}
};
enum class extmap_root_state_t : uint8_t {
INITIAL = 0,
MUTATED = 1,
NONE = 0xFF
};
using extent_map_list_t = std::list<extent_mapping_t>;
std::ostream &operator<<(std::ostream &out, const extent_mapping_t &rhs);
std::ostream &operator<<(std::ostream &out, const extent_map_list_t &rhs);
struct extmap_root_t {
depth_t depth = 0;
extmap_root_state_t state;
laddr_t extmap_root_laddr;
extmap_root_t(depth_t dep, laddr_t laddr)
: depth(dep),
extmap_root_laddr(laddr) { state = extmap_root_state_t::INITIAL; }
};
/**
* Abstract interface for managing the object inner offset to logical addr mapping
* each onode has an extentmap tree for a particular onode.
*/
class ExtentMapManager {
public:
using initialize_extmap_ertr = TransactionManager::alloc_extent_ertr;
using initialize_extmap_ret = initialize_extmap_ertr::future<extmap_root_t>;
virtual initialize_extmap_ret initialize_extmap(Transaction &t) = 0;
/* find_lextents
*
* Return a list of all extent_mapping_t overlapping any portion of lo~len.
* or if not find any overlap extent_mapping_t will return the next extent after the range.
*/
using find_lextent_ertr = TransactionManager::read_extent_ertr;
using find_lextent_ret = find_lextent_ertr::future<extent_map_list_t>;
virtual find_lextent_ret
find_lextent(const extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, extent_len_t len) = 0;
/* add_lextent
*
* add a new mapping (object offset -> laddr, length) to extent map
* return the added extent_mapping_t
*/
using add_lextent_ertr = TransactionManager::read_extent_ertr;
using add_lextent_ret = add_lextent_ertr::future<extent_mapping_t>;
virtual add_lextent_ret
add_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) = 0;
/* rm_lextent
*
* remove an existing extent mapping from extent map
* return true if the extent mapping is removed, otherwise return false
*/
using rm_lextent_ertr = TransactionManager::read_extent_ertr;
using rm_lextent_ret = rm_lextent_ertr::future<bool>;
virtual rm_lextent_ret rm_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) = 0;
virtual ~ExtentMapManager() {}
};
using ExtentMapManagerRef = std::unique_ptr<ExtentMapManager>;
namespace extentmap_manager {
/* creat ExtentMapManager for an extentmap
* if it is a new extmap after create_extentmap_manager need call initialize_extmap
* to initialize the extent map before use it
* if it is an exsiting extmap, needn't initialize_extmap
*/
ExtentMapManagerRef create_extentmap_manager(
TransactionManager &trans_manager);
}
}

View File

@ -1,118 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include <sys/mman.h>
#include <string.h>
#include "crimson/common/log.h"
#include "include/buffer.h"
#include "crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h"
#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h"
namespace {
seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_filestore);
}
}
namespace crimson::os::seastore::extentmap_manager {
BtreeExtentMapManager::BtreeExtentMapManager(
TransactionManager &tm)
: tm(tm) {}
BtreeExtentMapManager::initialize_extmap_ret
BtreeExtentMapManager::initialize_extmap(Transaction &t)
{
logger().debug("{}", __func__);
return tm.alloc_extent<ExtMapLeafNode>(t, L_ADDR_MIN, EXTMAP_BLOCK_SIZE)
.safe_then([](auto&& root_extent) {
root_extent->set_size(0);
extmap_node_meta_t meta{1};
root_extent->set_meta(meta);
extmap_root_t extmap_root = extmap_root_t(1, root_extent->get_laddr());
return initialize_extmap_ertr::make_ready_future<extmap_root_t>(extmap_root);
});
}
BtreeExtentMapManager::get_root_ret
BtreeExtentMapManager::get_extmap_root(const extmap_root_t &extmap_root, Transaction &t)
{
assert(extmap_root.extmap_root_laddr != L_ADDR_NULL);
laddr_t laddr = extmap_root.extmap_root_laddr;
return extmap_load_extent(get_ext_context(t), laddr, extmap_root.depth);
}
BtreeExtentMapManager::find_lextent_ret
BtreeExtentMapManager::find_lextent(const extmap_root_t &extmap_root, Transaction &t,
objaddr_t lo, extent_len_t len)
{
logger().debug("{}: {}, {}", __func__, lo, len);
return get_extmap_root(extmap_root, t).safe_then([this, &t, lo, len](auto&& extent) {
return extent->find_lextent(get_ext_context(t), lo, len);
}).safe_then([](auto &&e) {
logger().debug("{}: found_lextent {}", __func__, e);
return find_lextent_ret(
find_lextent_ertr::ready_future_marker{},
std::move(e));
});
}
BtreeExtentMapManager::add_lextent_ret
BtreeExtentMapManager::add_lextent(extmap_root_t &extmap_root, Transaction &t,
objaddr_t lo, lext_map_val_t val)
{
logger().debug("{}: {}, {}, {}", __func__, lo, val.laddr, val.length);
return get_extmap_root(extmap_root, t).safe_then([this, &extmap_root, &t, lo, val](auto &&root) {
return insert_lextent(extmap_root, t, root, lo, val);
}).safe_then([](auto ret) {
logger().debug("{}: {}", __func__, ret);
return add_lextent_ret(
add_lextent_ertr::ready_future_marker{},
std::move(ret));
});
}
BtreeExtentMapManager::insert_lextent_ret
BtreeExtentMapManager::insert_lextent(extmap_root_t &extmap_root, Transaction &t,
ExtMapNodeRef root, objaddr_t logical_offset, lext_map_val_t val)
{
auto split = insert_lextent_ertr::make_ready_future<ExtMapNodeRef>(root);
if (root->at_max_capacity()) {
logger().debug("{}::splitting root {}", __func__, *root);
split = root->extmap_alloc_extent<ExtMapInnerNode>(get_ext_context(t), EXTMAP_BLOCK_SIZE)
.safe_then([this, &extmap_root, root, &t, logical_offset](auto&& nroot) {
extmap_node_meta_t meta{root->get_node_meta().depth + 1};
nroot->set_meta(meta);
nroot->journal_insert(nroot->begin(), OBJ_ADDR_MIN,
root->get_laddr(), nullptr);
extmap_root.extmap_root_laddr = nroot->get_laddr();
extmap_root.depth = root->get_node_meta().depth + 1;
extmap_root.state = extmap_root_state_t::MUTATED;
return nroot->split_entry(get_ext_context(t), logical_offset, nroot->begin(), root);
});
}
return split.safe_then([this, &t, logical_offset, val](ExtMapNodeRef node) {
return node->insert(get_ext_context(t), logical_offset, val);
});
}
BtreeExtentMapManager::rm_lextent_ret
BtreeExtentMapManager::rm_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val)
{
logger().debug("{}: {}, {}, {}", __func__, lo, val.laddr, val.length);
return get_extmap_root(extmap_root, t).safe_then([this, &t, lo, val](auto extent) {
return extent->rm_lextent(get_ext_context(t), lo, val);
}).safe_then([](auto removed) {
logger().debug("{}: {}", __func__, removed);
return rm_lextent_ret(
rm_lextent_ertr::ready_future_marker{},
removed);
});
}
}

View File

@ -1,64 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once
#include <iostream>
#include <boost/intrusive_ptr.hpp>
#include <boost/smart_ptr/intrusive_ref_counter.hpp>
#include <seastar/core/future.hh>
#include "include/ceph_assert.h"
#include "crimson/osd/exceptions.h"
#include "crimson/os/seastore/extentmap_manager.h"
#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/transaction_manager.h"
namespace crimson::os::seastore::extentmap_manager {
/**
* BtreeExtentMapManager
*
* Uses a btree to track :
* objaddr_t -> laddr_t mapping for each onode extentmap
*/
class BtreeExtentMapManager : public ExtentMapManager {
TransactionManager &tm;
ext_context_t get_ext_context(Transaction &t) {
return ext_context_t{tm,t};
}
/* get_extmap_root
*
* load extent map tree root node
*/
using get_root_ertr = TransactionManager::read_extent_ertr;
using get_root_ret = get_root_ertr::future<ExtMapNodeRef>;
get_root_ret get_extmap_root(const extmap_root_t &extmap_root, Transaction &t);
using insert_lextent_ertr = TransactionManager::read_extent_ertr;
using insert_lextent_ret = insert_lextent_ertr::future<extent_mapping_t >;
insert_lextent_ret insert_lextent(extmap_root_t &extmap_root, Transaction &t,
ExtMapNodeRef extent, objaddr_t lo,
lext_map_val_t val);
public:
explicit BtreeExtentMapManager(TransactionManager &tm);
initialize_extmap_ret initialize_extmap(Transaction &t) final;
find_lextent_ret find_lextent(const extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, extent_len_t len) final;
add_lextent_ret add_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) final;
rm_lextent_ret rm_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) final;
};
using BtreeExtentMapManagerRef = std::unique_ptr<BtreeExtentMapManager>;
}

View File

@ -1,143 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once
#include <boost/iterator/counting_iterator.hpp>
#include "crimson/common/log.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/transaction_manager.h"
#include "crimson/os/seastore/extentmap_manager.h"
namespace crimson::os::seastore::extentmap_manager{
struct ext_context_t {
TransactionManager &tm;
Transaction &t;
};
struct extmap_node_meta_t {
depth_t depth = 0;
std::pair<extmap_node_meta_t, extmap_node_meta_t> split_into(objaddr_t pivot) const {
return std::make_pair(
extmap_node_meta_t{depth},
extmap_node_meta_t{depth});
}
static extmap_node_meta_t merge_from(
const extmap_node_meta_t &lhs, const extmap_node_meta_t &rhs) {
assert(lhs.depth == rhs.depth);
return extmap_node_meta_t{lhs.depth};
}
static std::pair<extmap_node_meta_t, extmap_node_meta_t>
rebalance(const extmap_node_meta_t &lhs, const extmap_node_meta_t &rhs, laddr_t pivot) {
assert(lhs.depth == rhs.depth);
return std::make_pair(
extmap_node_meta_t{lhs.depth},
extmap_node_meta_t{lhs.depth});
}
};
struct ExtMapNode : LogicalCachedExtent {
using ExtMapNodeRef = TCachedExtentRef<ExtMapNode>;
ExtMapNode(ceph::bufferptr &&ptr) : LogicalCachedExtent(std::move(ptr)) {}
ExtMapNode(const ExtMapNode &other)
: LogicalCachedExtent(other) {}
using find_lextent_ertr = ExtentMapManager::find_lextent_ertr;
using find_lextent_ret = ExtentMapManager::find_lextent_ret;
virtual find_lextent_ret find_lextent(ext_context_t ec,
objaddr_t lo, extent_len_t len) = 0;
using insert_ertr = TransactionManager::read_extent_ertr;
using insert_ret = insert_ertr::future<extent_mapping_t>;
virtual insert_ret insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) = 0;
using rm_lextent_ertr = TransactionManager::read_extent_ertr;
using rm_lextent_ret = rm_lextent_ertr::future<bool>;
virtual rm_lextent_ret rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) = 0;
using split_children_ertr = TransactionManager::alloc_extent_ertr;
using split_children_ret = split_children_ertr::future
<std::tuple<ExtMapNodeRef, ExtMapNodeRef, uint32_t>>;
virtual split_children_ret make_split_children(ext_context_t ec) = 0;
using full_merge_ertr = TransactionManager::alloc_extent_ertr;
using full_merge_ret = full_merge_ertr::future<ExtMapNodeRef>;
virtual full_merge_ret make_full_merge(ext_context_t ec, ExtMapNodeRef right) = 0;
using make_balanced_ertr = TransactionManager::alloc_extent_ertr;
using make_balanced_ret = make_balanced_ertr::future
<std::tuple<ExtMapNodeRef, ExtMapNodeRef, uint32_t>>;
virtual make_balanced_ret
make_balanced(ext_context_t ec, ExtMapNodeRef right, bool prefer_left) = 0;
virtual extmap_node_meta_t get_node_meta() const = 0;
virtual bool at_max_capacity() const = 0;
virtual bool at_min_capacity() const = 0;
virtual unsigned get_node_size() const = 0;
virtual ~ExtMapNode() = default;
using alloc_ertr = TransactionManager::alloc_extent_ertr;
template<class T>
alloc_ertr::future<TCachedExtentRef<T>>
extmap_alloc_extent(ext_context_t ec, extent_len_t len) {
return ec.tm.alloc_extent<T>(ec.t, L_ADDR_MIN, len).safe_then(
[](auto&& extent) {
return alloc_ertr::make_ready_future<TCachedExtentRef<T>>(std::move(extent));
});
}
template<class T>
alloc_ertr::future<std::pair<TCachedExtentRef<T>, TCachedExtentRef<T>>>
extmap_alloc_2extents(ext_context_t ec, extent_len_t len) {
return seastar::do_with(std::pair<TCachedExtentRef<T>, TCachedExtentRef<T>>(),
[ec, len] (auto &extents) {
return crimson::do_for_each(boost::make_counting_iterator(0),
boost::make_counting_iterator(2),
[ec, len, &extents] (auto i) {
return ec.tm.alloc_extent<T>(ec.t, L_ADDR_MIN, len).safe_then(
[i, &extents](auto &&node) {
if (i == 0)
extents.first = node;
if (i == 1)
extents.second = node;
});
}).safe_then([&extents] {
return alloc_ertr::make_ready_future
<std::pair<TCachedExtentRef<T>, TCachedExtentRef<T>>>(std::move(extents));
});
});
}
using retire_ertr = TransactionManager::ref_ertr;
using retire_ret = retire_ertr::future<std::list<unsigned>>;
retire_ret
extmap_retire_node(ext_context_t ec, std::list<laddr_t> dec_laddrs) {
return seastar::do_with(std::move(dec_laddrs), std::list<unsigned>(),
[ec] (auto &&dec_laddrs, auto &refcnt) {
return crimson::do_for_each(dec_laddrs.begin(), dec_laddrs.end(),
[ec, &refcnt] (auto &laddr) {
return ec.tm.dec_ref(ec.t, laddr).safe_then([&refcnt] (auto ref) {
refcnt.push_back(ref);
});
}).safe_then([&refcnt] {
return retire_ertr::make_ready_future<std::list<unsigned>>(std::move(refcnt));
});
});
}
};
using ExtMapNodeRef = ExtMapNode::ExtMapNodeRef;
TransactionManager::read_extent_ertr::future<ExtMapNodeRef>
extmap_load_extent(ext_context_t ec, laddr_t laddr, depth_t depth);
}

View File

@ -1,373 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include <sys/mman.h>
#include <string.h>
#include <memory>
#include <string.h>
#include "include/buffer.h"
#include "include/byteorder.h"
#include "crimson/os/seastore/transaction_manager.h"
#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h"
#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h"
namespace {
seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_filestore);
}
}
namespace crimson::os::seastore::extentmap_manager {
std::ostream &ExtMapInnerNode::print_detail_l(std::ostream &out) const
{
return out << ", size=" << get_size()
<< ", depth=" << get_meta().depth;
}
ExtMapInnerNode::find_lextent_ret
ExtMapInnerNode::find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len)
{
auto [begin, end] = bound(lo, lo + len);
auto result_up = std::make_unique<extent_map_list_t>();
auto &result = *result_up;
return crimson::do_for_each(
std::move(begin),
std::move(end),
[this, ec, &result, lo, len](const auto &val) mutable {
return extmap_load_extent(ec, val.get_val(), get_meta().depth - 1).safe_then(
[ec, &result, lo, len](auto extent) mutable {
return extent->find_lextent(ec, lo, len).safe_then(
[&result](auto item_list) mutable {
result.splice(result.end(), item_list,
item_list.begin(), item_list.end());
});
});
}).safe_then([result=std::move(result_up)] {
return find_lextent_ret(
find_lextent_ertr::ready_future_marker{},
std::move(*result));
});
}
ExtMapInnerNode::insert_ret
ExtMapInnerNode::insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val)
{
auto insertion_pt = get_containing_child(lo);
assert(insertion_pt != end());
return extmap_load_extent(ec, insertion_pt->get_val(), get_meta().depth - 1).safe_then(
[this, ec, insertion_pt, lo, val=std::move(val)](auto extent) mutable {
return extent->at_max_capacity() ?
split_entry(ec, lo, insertion_pt, extent) :
insert_ertr::make_ready_future<ExtMapNodeRef>(std::move(extent));
}).safe_then([ec, lo, val=std::move(val)](ExtMapNodeRef extent) mutable {
return extent->insert(ec, lo, val);
});
}
ExtMapInnerNode::rm_lextent_ret
ExtMapInnerNode::rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val)
{
auto rm_pt = get_containing_child(lo);
return extmap_load_extent(ec, rm_pt->get_val(), get_meta().depth - 1).safe_then(
[this, ec, rm_pt, lo, val=std::move(val)](auto extent) mutable {
if (extent->at_min_capacity() && get_node_size() > 1) {
return merge_entry(ec, lo, rm_pt, extent);
} else {
return merge_entry_ertr::make_ready_future<ExtMapNodeRef>(std::move(extent));
}
}).safe_then([ec, lo, val](ExtMapNodeRef extent) mutable {
return extent->rm_lextent(ec, lo, val);
});
}
ExtMapInnerNode::split_children_ret
ExtMapInnerNode::make_split_children(ext_context_t ec)
{
logger().debug("{}: {}", "ExtMapInnerNode", __func__);
return extmap_alloc_2extents<ExtMapInnerNode>(ec, EXTMAP_BLOCK_SIZE)
.safe_then([this] (auto &&ext_pair) {
auto [left, right] = ext_pair;
return split_children_ret(
split_children_ertr::ready_future_marker{},
std::make_tuple(left, right, split_into(*left, *right)));
});
}
ExtMapInnerNode::full_merge_ret
ExtMapInnerNode::make_full_merge(ext_context_t ec, ExtMapNodeRef right)
{
logger().debug("{}: {}", "ExtMapInnerNode", __func__);
return extmap_alloc_extent<ExtMapInnerNode>(ec, EXTMAP_BLOCK_SIZE)
.safe_then([this, right] (auto &&replacement) {
replacement->merge_from(*this, *right->cast<ExtMapInnerNode>());
return full_merge_ret(
full_merge_ertr::ready_future_marker{},
std::move(replacement));
});
}
ExtMapInnerNode::make_balanced_ret
ExtMapInnerNode::make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left)
{
logger().debug("{}: {}", "ExtMapInnerNode", __func__);
ceph_assert(_right->get_type() == type);
return extmap_alloc_2extents<ExtMapInnerNode>(ec, EXTMAP_BLOCK_SIZE)
.safe_then([this, _right, prefer_left] (auto &&replacement_pair){
auto [replacement_left, replacement_right] = replacement_pair;
auto &right = *_right->cast<ExtMapInnerNode>();
return make_balanced_ret(
make_balanced_ertr::ready_future_marker{},
std::make_tuple(replacement_left, replacement_right,
balance_into_new_nodes(*this, right, prefer_left,
*replacement_left, *replacement_right)));
});
}
ExtMapInnerNode::split_entry_ret
ExtMapInnerNode::split_entry(ext_context_t ec, objaddr_t lo,
internal_iterator_t iter, ExtMapNodeRef entry)
{
logger().debug("{}: {}", "ExtMapInnerNode", __func__);
if (!is_pending()) {
auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast<ExtMapInnerNode>();
auto mut_iter = mut->iter_idx(iter->get_offset());
return mut->split_entry(ec, lo, mut_iter, entry);
}
ceph_assert(!at_max_capacity());
return entry->make_split_children(ec)
.safe_then([this, ec, lo, iter, entry] (auto tuple){
auto [left, right, pivot] = tuple;
journal_update(iter, left->get_laddr(), maybe_get_delta_buffer());
journal_insert(iter + 1, pivot, right->get_laddr(), maybe_get_delta_buffer());
logger().debug(
"ExtMapInnerNode::split_entry *this {} entry {} into left {} right {}",
*this, *entry, *left, *right);
//retire extent
return ec.tm.dec_ref(ec.t, entry->get_laddr())
.safe_then([lo, left = left, right = right, pivot = pivot] (auto ret) {
return split_entry_ertr::make_ready_future<ExtMapNodeRef>(
pivot > lo ? left : right);
});
});
}
ExtMapInnerNode::merge_entry_ret
ExtMapInnerNode::merge_entry(ext_context_t ec, objaddr_t lo,
internal_iterator_t iter, ExtMapNodeRef entry)
{
if (!is_pending()) {
auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast<ExtMapInnerNode>();
auto mut_iter = mut->iter_idx(iter->get_offset());
return mut->merge_entry(ec, lo, mut_iter, entry);
}
logger().debug("ExtMapInnerNode: merge_entry: {}, {}", *this, *entry);
auto is_left = (iter + 1) == end();
auto donor_iter = is_left ? iter - 1 : iter + 1;
return extmap_load_extent(ec, donor_iter->get_val(), get_meta().depth - 1)
.safe_then([this, ec, lo, iter, entry, donor_iter, is_left]
(auto &&donor) mutable {
auto [l, r] = is_left ?
std::make_pair(donor, entry) : std::make_pair(entry, donor);
auto [liter, riter] = is_left ?
std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
if (donor->at_min_capacity()) {
return l->make_full_merge(ec, r)
.safe_then([this, ec, entry, l = l, r = r, liter = liter, riter = riter]
(auto &&replacement){
journal_update(liter, replacement->get_laddr(), maybe_get_delta_buffer());
journal_remove(riter, maybe_get_delta_buffer());
//retire extent
std::list<laddr_t> dec_laddrs;
dec_laddrs.push_back(l->get_laddr());
dec_laddrs.push_back(r->get_laddr());
return extmap_retire_node(ec, dec_laddrs)
.safe_then([replacement] (auto &&ret) {
return merge_entry_ertr::make_ready_future<ExtMapNodeRef>(replacement);
});
});
} else {
logger().debug("ExtMapInnerNode::merge_entry balanced l {} r {}",
*l, *r);
return l->make_balanced(ec, r, !is_left)
.safe_then([this, ec, lo, entry, l = l, r = r, liter = liter, riter = riter]
(auto tuple) {
auto [replacement_l, replacement_r, pivot] = tuple;
journal_update(liter, replacement_l->get_laddr(), maybe_get_delta_buffer());
journal_replace(riter, pivot, replacement_r->get_laddr(),
maybe_get_delta_buffer());
// retire extent
std::list<laddr_t> dec_laddrs;
dec_laddrs.push_back(l->get_laddr());
dec_laddrs.push_back(r->get_laddr());
return extmap_retire_node(ec, dec_laddrs)
.safe_then([lo, pivot = pivot, replacement_l = replacement_l, replacement_r = replacement_r]
(auto &&ret) {
return merge_entry_ertr::make_ready_future<ExtMapNodeRef>(
lo >= pivot ? replacement_r : replacement_l);
});
});
}
});
}
ExtMapInnerNode::internal_iterator_t
ExtMapInnerNode::get_containing_child(objaddr_t lo)
{
// TODO: binary search
for (auto i = begin(); i != end(); ++i) {
if (i.contains(lo))
return i;
}
ceph_assert(0 == "invalid");
return end();
}
std::ostream &ExtMapLeafNode::print_detail_l(std::ostream &out) const
{
return out << ", size=" << get_size()
<< ", depth=" << get_meta().depth;
}
ExtMapLeafNode::find_lextent_ret
ExtMapLeafNode::find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len)
{
logger().debug(
"ExtMapLeafNode::find_lextent {}~{}", lo, len);
auto ret = extent_map_list_t();
auto [from, to] = get_leaf_entries(lo, len);
if (from == to && to != end())
++to;
for (; from != to; ++from) {
auto val = (*from).get_val();
ret.emplace_back(
extent_mapping_t(
(*from).get_key(),
val.laddr,
val.length));
logger().debug("ExtMapLeafNode::find_lextent find {}~{}", lo, val.laddr);
}
return find_lextent_ertr::make_ready_future<extent_map_list_t>(
std::move(ret));
}
ExtMapLeafNode::insert_ret
ExtMapLeafNode::insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val)
{
ceph_assert(!at_max_capacity());
if (!is_pending()) {
auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast<ExtMapLeafNode>();
return mut->insert(ec, lo, val);
}
auto insert_pt = lower_bound(lo);
journal_insert(insert_pt, lo, val, maybe_get_delta_buffer());
logger().debug(
"ExtMapLeafNode::insert: inserted {}->{} {}",
insert_pt.get_key(),
insert_pt.get_val().laddr,
insert_pt.get_val().length);
return insert_ertr::make_ready_future<extent_mapping_t>(
extent_mapping_t(lo, val.laddr, val.length));
}
ExtMapLeafNode::rm_lextent_ret
ExtMapLeafNode::rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val)
{
if (!is_pending()) {
auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast<ExtMapLeafNode>();
return mut->rm_lextent(ec, lo, val);
}
auto [rm_pt, rm_end] = get_leaf_entries(lo, val.length);
if (lo == rm_pt->get_key() && val.laddr == rm_pt->get_val().laddr
&& val.length == rm_pt->get_val().length) {
journal_remove(rm_pt, maybe_get_delta_buffer());
logger().debug(
"ExtMapLeafNode::rm_lextent: removed {}->{} {}",
rm_pt.get_key(),
rm_pt.get_val().laddr,
rm_pt.get_val().length);
return rm_lextent_ertr::make_ready_future<bool>(true);
} else {
return rm_lextent_ertr::make_ready_future<bool>(false);
}
}
ExtMapLeafNode::split_children_ret
ExtMapLeafNode::make_split_children(ext_context_t ec)
{
logger().debug("{}: {}", "ExtMapLeafNode", __func__);
return extmap_alloc_2extents<ExtMapLeafNode>(ec, EXTMAP_BLOCK_SIZE)
.safe_then([this] (auto &&ext_pair) {
auto [left, right] = ext_pair;
return split_children_ret(
split_children_ertr::ready_future_marker{},
std::make_tuple(left, right, split_into(*left, *right)));
});
}
ExtMapLeafNode::full_merge_ret
ExtMapLeafNode::make_full_merge(ext_context_t ec, ExtMapNodeRef right)
{
logger().debug("{}: {}", "ExtMapLeafNode", __func__);
return extmap_alloc_extent<ExtMapLeafNode>(ec, EXTMAP_BLOCK_SIZE)
.safe_then([this, right] (auto &&replacement) {
replacement->merge_from(*this, *right->cast<ExtMapLeafNode>());
return full_merge_ret(
full_merge_ertr::ready_future_marker{},
std::move(replacement));
});
}
ExtMapLeafNode::make_balanced_ret
ExtMapLeafNode::make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left)
{
logger().debug("{}: {}", "ExtMapLeafNode", __func__);
ceph_assert(_right->get_type() == type);
return extmap_alloc_2extents<ExtMapLeafNode>(ec, EXTMAP_BLOCK_SIZE)
.safe_then([this, _right, prefer_left] (auto &&replacement_pair) {
auto [replacement_left, replacement_right] = replacement_pair;
auto &right = *_right->cast<ExtMapLeafNode>();
return make_balanced_ret(
make_balanced_ertr::ready_future_marker{},
std::make_tuple(
replacement_left, replacement_right,
balance_into_new_nodes(
*this, right, prefer_left,
*replacement_left, *replacement_right)));
});
}
std::pair<ExtMapLeafNode::internal_iterator_t, ExtMapLeafNode::internal_iterator_t>
ExtMapLeafNode::get_leaf_entries(objaddr_t addr, extent_len_t len)
{
return bound(addr, addr + len);
}
TransactionManager::read_extent_ertr::future<ExtMapNodeRef>
extmap_load_extent(ext_context_t ec, laddr_t laddr, depth_t depth)
{
ceph_assert(depth > 0);
if (depth > 1) {
return ec.tm.read_extents<ExtMapInnerNode>(ec.t, laddr, EXTMAP_BLOCK_SIZE).safe_then(
[](auto&& extents) {
assert(extents.size() == 1);
[[maybe_unused]] auto [laddr, e] = extents.front();
return TransactionManager::read_extent_ertr::make_ready_future<ExtMapNodeRef>(std::move(e));
});
} else {
return ec.tm.read_extents<ExtMapLeafNode>(ec.t, laddr, EXTMAP_BLOCK_SIZE).safe_then(
[](auto&& extents) {
assert(extents.size() == 1);
[[maybe_unused]] auto [laddr, e] = extents.front();
return TransactionManager::read_extent_ertr::make_ready_future<ExtMapNodeRef>(std::move(e));
});
}
}
}

View File

@ -1,281 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once
#include "include/buffer.h"
#include "crimson/common/fixed_kv_node_layout.h"
#include "crimson/common/errorator.h"
#include "crimson/os/seastore/extentmap_manager.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h"
namespace crimson::os::seastore::extentmap_manager {
struct extmap_node_meta_le_t {
depth_le_t depth = init_depth_le(0);
extmap_node_meta_le_t() = default;
extmap_node_meta_le_t(const extmap_node_meta_le_t &) = default;
explicit extmap_node_meta_le_t(const extmap_node_meta_t &val)
: depth(init_depth_le(val.depth)) {}
operator extmap_node_meta_t() const {
return extmap_node_meta_t{ depth };
}
};
/**
* ExtMapInnerNode
*
* Abstracts operations on and layout of internal nodes for the
* Extentmap Tree.
*
* Layout (4k):
* num_entries: uint32_t 4b
* meta : depth 4b
* (padding) : 8b
* keys : objaddr_t[340] (340*4)b
* values : laddr_t[340] (340*8)b
* = 4096
*/
constexpr size_t INNER_NODE_CAPACITY =
(EXTMAP_BLOCK_SIZE - sizeof(uint32_t) - sizeof(extmap_node_meta_t))
/ (sizeof (objaddr_t) + sizeof(laddr_t));
struct ExtMapInnerNode
: ExtMapNode,
common::FixedKVNodeLayout<
INNER_NODE_CAPACITY,
extmap_node_meta_t, extmap_node_meta_le_t,
objaddr_t, ceph_le32,
laddr_t, laddr_le_t> {
using internal_iterator_t = const_iterator;
template <typename... T>
ExtMapInnerNode(T&&... t) :
ExtMapNode(std::forward<T>(t)...),
FixedKVNodeLayout(get_bptr().c_str()) {}
static constexpr extent_types_t type = extent_types_t::EXTMAP_INNER;
extmap_node_meta_t get_node_meta() const final {return get_meta();}
CachedExtentRef duplicate_for_write() final {
assert(delta_buffer.empty());
return CachedExtentRef(new ExtMapInnerNode(*this));
};
delta_buffer_t delta_buffer;
delta_buffer_t *maybe_get_delta_buffer() {
return is_mutation_pending() ? &delta_buffer : nullptr;
}
find_lextent_ret find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len) final;
insert_ret insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final;
rm_lextent_ret rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final;
split_children_ret make_split_children(ext_context_t ec) final;
full_merge_ret make_full_merge(ext_context_t ec, ExtMapNodeRef right) final;
make_balanced_ret make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left) final;
std::ostream &print_detail_l(std::ostream &out) const final;
extent_types_t get_type() const final {
return type;
}
ceph::bufferlist get_delta() final {
assert(!delta_buffer.empty());
ceph::buffer::ptr bptr(delta_buffer.get_bytes());
delta_buffer.copy_out(bptr.c_str(), bptr.length());
ceph::bufferlist bl;
bl.push_back(bptr);
return bl;
}
void apply_delta(const ceph::bufferlist &_bl) final {
assert(_bl.length());
ceph::bufferlist bl = _bl;
bl.rebuild();
delta_buffer_t buffer;
buffer.copy_in(bl.front().c_str(), bl.front().length());
buffer.replay(*this);
}
bool at_max_capacity() const final {
return get_size() == get_capacity();
}
bool at_min_capacity() const {
return get_size() == get_capacity() / 2;
}
unsigned get_node_size() const {
return get_size();
}
/* get the iterator containing [l, r]
*/
std::pair<internal_iterator_t, internal_iterator_t> bound(
objaddr_t l, objaddr_t r) {
auto retl = begin();
for (; retl != end(); ++retl) {
if (retl->get_next_key_or_max() > l)
break;
}
auto retr = retl;
for (; retr != end(); ++retr) {
if (retr->get_key() >= r)
break;
}
return {retl, retr};
}
using split_entry_ertr = TransactionManager::read_extent_ertr;
using split_entry_ret = split_entry_ertr::future<ExtMapNodeRef>;
split_entry_ret split_entry(ext_context_t ec, objaddr_t lo,
internal_iterator_t, ExtMapNodeRef entry);
using merge_entry_ertr = TransactionManager::read_extent_ertr;
using merge_entry_ret = merge_entry_ertr::future<ExtMapNodeRef>;
merge_entry_ret merge_entry(ext_context_t ec, objaddr_t lo,
internal_iterator_t iter, ExtMapNodeRef entry);
internal_iterator_t get_containing_child(objaddr_t lo);
};
/**
* ExtMapLeafNode
*
* Abstracts operations on and layout of leaf nodes for the
* ExtentMap Tree.
*
* Layout (4k):
* num_entries: uint32_t 4b
* meta : depth 4b
* (padding) : 8b
* keys : objaddr_t[204] (204*4)b
* values : lext_map_val_t[204] (204*16)b
* = 4096
*/
constexpr size_t LEAF_NODE_CAPACITY =
(EXTMAP_BLOCK_SIZE - sizeof(uint32_t) - sizeof(extmap_node_meta_t))
/ (sizeof(objaddr_t) + sizeof(lext_map_val_t));
struct lext_map_val_le_t {
laddr_le_t laddr;
extent_len_le_t length = init_extent_len_le(0);
lext_map_val_le_t() = default;
lext_map_val_le_t(const lext_map_val_le_t &) = default;
explicit lext_map_val_le_t(const lext_map_val_t &val)
: laddr(laddr_le_t(val.laddr)),
length(init_extent_len_le(val.length)) {}
operator lext_map_val_t() const {
return lext_map_val_t{laddr, length};
}
};
struct ExtMapLeafNode
: ExtMapNode,
common::FixedKVNodeLayout<
LEAF_NODE_CAPACITY,
extmap_node_meta_t, extmap_node_meta_le_t,
objaddr_t, ceph_le32,
lext_map_val_t, lext_map_val_le_t> {
using internal_iterator_t = const_iterator;
template <typename... T>
ExtMapLeafNode(T&&... t) :
ExtMapNode(std::forward<T>(t)...),
FixedKVNodeLayout(get_bptr().c_str()) {}
static constexpr extent_types_t type = extent_types_t::EXTMAP_LEAF;
extmap_node_meta_t get_node_meta() const final { return get_meta(); }
CachedExtentRef duplicate_for_write() final {
assert(delta_buffer.empty());
return CachedExtentRef(new ExtMapLeafNode(*this));
};
delta_buffer_t delta_buffer;
delta_buffer_t *maybe_get_delta_buffer() {
return is_mutation_pending() ? &delta_buffer : nullptr;
}
find_lextent_ret find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len) final;
insert_ret insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final;
rm_lextent_ret rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final;
split_children_ret make_split_children(ext_context_t ec) final;
full_merge_ret make_full_merge(ext_context_t ec, ExtMapNodeRef right) final;
make_balanced_ret make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left) final;
extent_types_t get_type() const final {
return type;
}
ceph::bufferlist get_delta() final {
assert(!delta_buffer.empty());
ceph::buffer::ptr bptr(delta_buffer.get_bytes());
delta_buffer.copy_out(bptr.c_str(), bptr.length());
ceph::bufferlist bl;
bl.push_back(bptr);
return bl;
}
void apply_delta(const ceph::bufferlist &_bl) final {
assert(_bl.length());
ceph::bufferlist bl = _bl;
bl.rebuild();
delta_buffer_t buffer;
buffer.copy_in(bl.front().c_str(), bl.front().length());
buffer.replay(*this);
}
std::ostream &print_detail_l(std::ostream &out) const final;
bool at_max_capacity() const final {
return get_size() == get_capacity();
}
bool at_min_capacity() const final {
return get_size() == get_capacity() / 2;
}
unsigned get_node_size() const {
return get_size();
}
/* get the iterator containing [l, r]
*/
std::pair<internal_iterator_t, internal_iterator_t> bound(
objaddr_t l, objaddr_t r) {
auto retl = begin();
for (; retl != end(); ++retl) {
if (retl->get_key() >= l || (retl->get_key() + retl->get_val().length) > l)
break;
}
auto retr = retl;
for (; retr != end(); ++retr) {
if (retr->get_key() >= r)
break;
}
return {retl, retr};
}
std::pair<internal_iterator_t, internal_iterator_t>
get_leaf_entries(objaddr_t lo, extent_len_t len);
};
using ExtentMapLeafNodeRef = TCachedExtentRef<ExtMapLeafNode>;
}

View File

@ -59,6 +59,18 @@ public:
Transaction &t,
laddr_list_t &&extent_lisk) = 0;
/**
* Finds unmapped laddr extent of len len
*/
using find_hole_ertr = base_ertr;
using find_hole_ret = find_hole_ertr::future<
std::pair<laddr_t, extent_len_t>
>;
virtual find_hole_ret find_hole(
Transaction &t,
laddr_t hint,
extent_len_t) = 0;
/**
* Allocates a new mapping referenced by LBARef
*

View File

@ -102,6 +102,26 @@ BtreeLBAManager::get_mappings(
});
}
BtreeLBAManager::find_hole_ret
BtreeLBAManager::find_hole(
Transaction &t,
laddr_t hint,
extent_len_t len)
{
return get_root(t
).safe_then([this, hint, len, &t](auto extent) {
return extent->find_hole(
get_context(t),
hint,
L_ADDR_MAX,
len);
}).safe_then([len](auto addr) {
return seastar::make_ready_future<std::pair<laddr_t, extent_len_t>>(
addr, len);
});
}
BtreeLBAManager::alloc_extent_ret
BtreeLBAManager::alloc_extent(
Transaction &t,

View File

@ -58,6 +58,11 @@ public:
Transaction &t,
laddr_list_t &&list) final;
find_hole_ret find_hole(
Transaction &t,
laddr_t hint,
extent_len_t) final;
alloc_extent_ret alloc_extent(
Transaction &t,
laddr_t hint,

View File

@ -263,6 +263,7 @@ public:
auto ret = std::unique_ptr<BtreeLBAPin>(new BtreeLBAPin);
ret->pin.set_range(pin.range);
ret->paddr = paddr;
ret->parent = parent;
return ret;
}

View File

@ -0,0 +1,536 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include <utility>
#include <functional>
#include "crimson/common/log.h"
#include "crimson/os/seastore/object_data_handler.h"
namespace {
seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_filestore);
}
}
namespace crimson::os::seastore {
/**
* MAX_OBJECT_SIZE
*
* For now, we allocate a fixed region of laddr space of size MAX_OBJECT_SIZE
* for any object. In the future, once we have the ability to remap logical
* mappings (necessary for clone), we'll add the ability to grow and shrink
* these regions and remove this assumption.
*/
static constexpr extent_len_t MAX_OBJECT_SIZE = 16<<20;
#define assert_aligned(x) ceph_assert(((x)%ctx.tm.get_block_size()) == 0)
using context_t = ObjectDataHandler::context_t;
using get_ertr = ObjectDataHandler::write_ertr;
auto read_pin(
context_t ctx,
LBAPinRef pin) {
return ctx.tm.pin_to_extent<ObjectDataBlock>(
ctx.t,
std::move(pin)
).handle_error(
get_ertr::pass_further{},
crimson::ct_error::assert_all{ "read_pin: invalid error" }
);
}
/**
* extent_to_write_t
*
* Encapsulates extents to be written out using do_insertions.
* Indicates a zero extent or a data extent based on whether
* to_write is populate.
*/
struct extent_to_write_t {
laddr_t addr = L_ADDR_NULL;
extent_len_t len;
std::optional<bufferlist> to_write;
extent_to_write_t() = default;
extent_to_write_t(const extent_to_write_t &) = default;
extent_to_write_t(extent_to_write_t &&) = default;
extent_to_write_t(laddr_t addr, bufferlist to_write)
: addr(addr), len(to_write.length()), to_write(to_write) {}
extent_to_write_t(laddr_t addr, extent_len_t len)
: addr(addr), len(len) {}
};
using extent_to_write_list_t = std::list<extent_to_write_t>;
/// Removes extents/mappings in pins
ObjectDataHandler::write_ret do_removals(
context_t ctx,
lba_pin_list_t &pins)
{
return crimson::do_for_each(
pins.begin(),
pins.end(),
[ctx](auto &pin) {
return ctx.tm.dec_ref(
ctx.t,
pin->get_laddr()
).safe_then(
[](auto){},
ObjectDataHandler::write_ertr::pass_further{},
crimson::ct_error::assert_all{
"object_data_handler::do_removals invalid error"
}
);
});
}
/// Creates zero/data extents in to_write
ObjectDataHandler::write_ret do_insertions(
context_t ctx,
extent_to_write_list_t &to_write)
{
return crimson::do_for_each(
to_write.begin(),
to_write.end(),
[ctx](auto &region) {
if (region.to_write) {
assert_aligned(region.addr);
assert_aligned(region.len);
ceph_assert(region.len == region.to_write->length());
return ctx.tm.alloc_extent<ObjectDataBlock>(
ctx.t,
region.addr,
region.len
).safe_then([ctx, &region](auto extent) {
if (extent->get_laddr() != region.addr) {
logger().debug(
"object_data_handler::do_insertions alloc got addr {},"
" should have been {}",
extent->get_laddr(),
region.addr);
}
ceph_assert(extent->get_laddr() == region.addr);
ceph_assert(extent->get_length() == region.len);
auto iter = region.to_write->cbegin();
iter.copy(region.len, extent->get_bptr().c_str());
return ObjectDataHandler::write_ertr::now();
});
} else {
return ctx.tm.reserve_region(
ctx.t,
region.addr,
region.len
).safe_then([&region](auto pin) {
ceph_assert(pin->get_length() == region.len);
ceph_assert(pin->get_laddr() == region.addr);
return ObjectDataHandler::write_ertr::now();
});
}
});
}
/**
* split_pin_left
*
* Splits the passed pin returning aligned extent to be rewritten
* to the left (if a zero extent), tail to be prepended to write
* beginning at offset. See below for details.
*/
using split_ret_bare = std::pair<
std::optional<extent_to_write_t>,
std::optional<bufferptr>>;
using split_ret = get_ertr::future<split_ret_bare>;
split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset)
{
const auto pin_offset = pin->get_laddr();
assert_aligned(pin_offset);
ceph_assert(offset >= pin_offset);
if (offset == pin_offset) {
// Aligned, no tail and no extra extent
return get_ertr::make_ready_future<split_ret_bare>(
std::nullopt,
std::nullopt);
} else if (pin->get_paddr().is_zero()) {
/* Zero extent unaligned, return largest aligned zero extent to
* the left and the gap between aligned_offset and offset to prepend. */
auto aligned_offset = p2align(offset, (uint64_t)ctx.tm.get_block_size());
assert_aligned(aligned_offset);
ceph_assert(aligned_offset <= offset);
auto zero_extent_len = aligned_offset - pin_offset;
assert_aligned(zero_extent_len);
auto zero_prepend_len = offset - aligned_offset;
return get_ertr::make_ready_future<split_ret_bare>(
(zero_extent_len == 0
? std::nullopt
: std::make_optional(extent_to_write_t(pin_offset, zero_extent_len))),
bufferptr(ceph::buffer::create(zero_prepend_len, 0))
);
} else {
// Data, return up to offset to prepend
auto to_prepend = offset - pin->get_laddr();
return read_pin(ctx, pin->duplicate()
).safe_then([to_prepend](auto extent) {
return get_ertr::make_ready_future<split_ret_bare>(
std::nullopt,
bufferptr(extent->get_bptr(), 0, to_prepend));
});
}
};
/// Reverse of split_pin_left
split_ret split_pin_right(context_t ctx, LBAPinRef &pin, laddr_t end)
{
const auto pin_begin = pin->get_laddr();
const auto pin_end = pin->get_laddr() + pin->get_length();
assert_aligned(pin_end);
ceph_assert(pin_end >= end);
if (end == pin_end) {
return get_ertr::make_ready_future<split_ret_bare>(
std::nullopt,
std::nullopt);
} else if (pin->get_paddr().is_zero()) {
auto aligned_end = p2roundup(end, (uint64_t)ctx.tm.get_block_size());
assert_aligned(aligned_end);
ceph_assert(aligned_end >= end);
auto zero_suffix_len = aligned_end - end;
auto zero_extent_len = pin_end - aligned_end;
assert_aligned(zero_extent_len);
return get_ertr::make_ready_future<split_ret_bare>(
(zero_extent_len == 0
? std::nullopt
: std::make_optional(extent_to_write_t(aligned_end, zero_extent_len))),
bufferptr(ceph::buffer::create(zero_suffix_len, 0))
);
} else {
return read_pin(ctx, pin->duplicate()
).safe_then([end, pin_begin, pin_end](auto extent) {
return get_ertr::make_ready_future<split_ret_bare>(
std::nullopt,
bufferptr(
extent->get_bptr(),
end - pin_begin,
pin_end - end));
});
}
};
template <typename F>
auto with_object_data(
ObjectDataHandler::context_t ctx,
F &&f)
{
return seastar::do_with(
ctx.onode.get_layout().object_data.get(),
std::forward<F>(f),
[ctx](auto &object_data, auto &f) {
return std::invoke(f, object_data
).safe_then([ctx, &object_data] {
if (object_data.must_update()) {
ctx.onode.get_mutable_layout(ctx.t).object_data.update(object_data);
}
return seastar::now();
});
});
}
ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
context_t ctx,
object_data_t &object_data,
extent_len_t size)
{
ceph_assert(size <= MAX_OBJECT_SIZE);
if (!object_data.is_null()) {
ceph_assert(object_data.get_reserved_data_len() == MAX_OBJECT_SIZE);
return write_ertr::now();
} else {
return ctx.tm.reserve_region(
ctx.t,
0 /* TODO -- pass hint based on object hash */,
MAX_OBJECT_SIZE
).safe_then([size, &object_data](auto pin) {
ceph_assert(pin->get_length() == MAX_OBJECT_SIZE);
object_data.update_reserved(
pin->get_laddr(),
pin->get_length());
return write_ertr::now();
});
}
}
ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation(
context_t ctx, object_data_t &object_data, extent_len_t size)
{
ceph_assert(!object_data.is_null());
assert_aligned(size);
ceph_assert(size <= object_data.get_reserved_data_len());
return seastar::do_with(
lba_pin_list_t(),
extent_to_write_list_t(),
[this, ctx, size, &object_data](auto &pins, auto &to_write) {
return ctx.tm.get_pins(
ctx.t,
object_data.get_reserved_data_base() + size,
object_data.get_reserved_data_len() - size
).safe_then([this, ctx, size, &pins, &object_data, &to_write](auto _pins) {
_pins.swap(pins);
ceph_assert(pins.size());
auto &pin = *pins.front();
ceph_assert(pin.get_laddr() >= object_data.get_reserved_data_base());
ceph_assert(
pin.get_laddr() <= object_data.get_reserved_data_base() + size);
auto pin_offset = pin.get_laddr() -
object_data.get_reserved_data_base();
if (pin.get_paddr().is_zero()) {
to_write.emplace_back(
pin.get_laddr(),
object_data.get_reserved_data_len() - pin_offset);
return clear_ertr::now();
} else {
return read_pin(
ctx,
pin.duplicate()
).safe_then([ctx, size, pin_offset, &pin, &object_data, &to_write](
auto extent) {
bufferlist bl;
bl.append(
bufferptr(
extent->get_bptr(),
0,
size - pin_offset
));
to_write.emplace_back(
pin.get_laddr(),
bl);
to_write.emplace_back(
object_data.get_reserved_data_base() + size,
object_data.get_reserved_data_len() - size);
return clear_ertr::now();
});
}
}).safe_then([ctx, size, &pins] {
return do_removals(ctx, pins);
}).safe_then([ctx, size, &to_write] {
return do_insertions(ctx, to_write);
}).safe_then([size, &object_data] {
if (size == 0) {
object_data.clear();
}
return ObjectDataHandler::clear_ertr::now();
});
});
}
/**
* get_buffers
*
* Returns extent_to_write_t's from bl.
*
* TODO: probably add some kind of upper limit on extent size.
*/
extent_to_write_list_t get_buffers(laddr_t offset, bufferlist &bl)
{
auto ret = extent_to_write_list_t();
ret.emplace_back(offset, bl);
return ret;
};
ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
context_t ctx,
laddr_t _offset,
bufferlist &&bl,
lba_pin_list_t &&_pins)
{
return seastar::do_with(
_offset,
std::move(bl),
std::move(_pins),
extent_to_write_list_t(),
[this, ctx](laddr_t &offset, auto &bl, auto &pins, auto &to_write) {
ceph_assert(pins.size() >= 1);
auto pin_begin = pins.front()->get_laddr();
ceph_assert(pin_begin <= offset);
auto pin_end = pins.back()->get_laddr() + pins.back()->get_length();
ceph_assert(pin_end >= (offset > bl.length()));
return split_pin_left(
ctx,
pins.front(),
offset
).safe_then([this, ctx, pin_begin, &offset, &bl, &pins, &to_write](
auto p) {
auto &[left_extent, headptr] = p;
if (left_extent) {
ceph_assert(left_extent->addr == pin_begin);
to_write.push_front(std::move(*left_extent));
}
if (headptr) {
bufferlist newbl;
newbl.append(*headptr);
newbl.append(bl);
bl.swap(newbl);
offset -= headptr->length();
assert_aligned(offset);
}
return split_pin_right(
ctx,
pins.back(),
offset + bl.length());
}).safe_then([this, ctx, pin_end, &offset, &bl, &pins, &to_write](
auto p) {
auto &[right_extent, tailptr] = p;
if (tailptr) {
bl.append(*tailptr);
assert_aligned(bl.length());
}
to_write.splice(to_write.end(), get_buffers(offset, bl));
if (right_extent) {
ceph_assert((right_extent->addr + right_extent->len) == pin_end);
to_write.push_back(std::move(*right_extent));
}
return write_ertr::now();
}).safe_then([this, ctx, &pins] {
return do_removals(ctx, pins);
}).safe_then([this, ctx, &to_write] {
return do_insertions(ctx, to_write);
});
});
}
ObjectDataHandler::write_ret ObjectDataHandler::write(
context_t ctx,
objaddr_t offset,
const bufferlist &bl)
{
return with_object_data(
ctx,
[this, ctx, offset, &bl](auto &object_data) {
return prepare_data_reservation(
ctx,
object_data,
p2roundup(offset + bl.length(), ctx.tm.get_block_size())
).safe_then([this, ctx, offset, &object_data, &bl] {
auto logical_offset = object_data.get_reserved_data_base() + offset;
return ctx.tm.get_pins(
ctx.t,
logical_offset,
bl.length()
).safe_then([this, ctx, offset, logical_offset, &object_data, &bl](
auto pins) {
return overwrite(ctx, logical_offset, bufferlist(bl), std::move(pins));
});
});
});
}
ObjectDataHandler::read_ret ObjectDataHandler::read(
context_t ctx,
objaddr_t obj_offset,
extent_len_t len)
{
return seastar::do_with(
bufferlist(),
[this, ctx, obj_offset, len](auto &ret) {
return with_object_data(
ctx,
[this, ctx, obj_offset, len, &ret](const auto &object_data) {
/* Assumption: callers ensure that onode size is <= reserved
* size and that len is adjusted here prior to call */
ceph_assert(!object_data.is_null());
ceph_assert((obj_offset + len) <= object_data.get_reserved_data_len());
ceph_assert(len > 0);
laddr_t loffset =
object_data.get_reserved_data_base() + obj_offset;
return ctx.tm.get_pins(
ctx.t,
loffset,
len
).safe_then([this, ctx, loffset, len, &ret](auto _pins) {
// offset~len falls within reserved region and len > 0
ceph_assert(_pins.size() >= 1);
ceph_assert((*_pins.begin())->get_laddr() <= loffset);
return seastar::do_with(
std::move(_pins),
loffset,
[this, ctx, loffset, len, &ret](auto &pins, auto &current) {
return crimson::do_for_each(
std::begin(pins),
std::end(pins),
[this, ctx, loffset, len, &current, &ret](auto &pin)
-> read_ertr::future<> {
ceph_assert(current <= (loffset + len));
ceph_assert(
(loffset + len) > pin->get_laddr());
laddr_t end = std::min(
pin->get_laddr() + pin->get_length(),
loffset + len);
if (pin->get_paddr().is_zero()) {
ceph_assert(end > current); // See LBAManager::get_mappings
ret.append_zero(end - current);
current = end;
return seastar::now();
} else {
return ctx.tm.pin_to_extent<ObjectDataBlock>(
ctx.t,
std::move(pin)
).safe_then([&ret, &current, end](auto extent) {
ceph_assert(
(extent->get_laddr() + extent->get_length()) >= end);
ceph_assert(end > current);
ret.append(
bufferptr(
extent->get_bptr(),
current - extent->get_laddr(),
end - current));
current = end;
return seastar::now();
}).handle_error(
read_ertr::pass_further{},
crimson::ct_error::assert_all{
"ObjectDataHandler::read hit invalid error"
}
);
}
});
});
});
}).safe_then([&ret] {
return std::move(ret);
});
});
}
ObjectDataHandler::truncate_ret ObjectDataHandler::truncate(
context_t ctx,
objaddr_t offset)
{
return with_object_data(
ctx,
[this, ctx, offset](auto &object_data) {
if (offset < object_data.get_reserved_data_len()) {
return trim_data_reservation(ctx, object_data, offset);
} else if (offset > object_data.get_reserved_data_len()) {
return prepare_data_reservation(
ctx,
object_data,
offset);
} else {
return truncate_ertr::now();
}
});
}
ObjectDataHandler::clear_ret ObjectDataHandler::clear(
context_t ctx)
{
return with_object_data(
ctx,
[this, ctx](auto &object_data) {
return trim_data_reservation(ctx, object_data, 0);
});
}
}

View File

@ -0,0 +1,109 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once
#include <iostream>
#include <limits>
#include "include/buffer.h"
#include "test/crimson/seastore/test_block.h" // TODO
#include "crimson/os/seastore/onode.h"
#include "crimson/os/seastore/transaction_manager.h"
#include "crimson/os/seastore/transaction.h"
namespace crimson::os::seastore {
struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent {
using Ref = TCachedExtentRef<ObjectDataBlock>;
ObjectDataBlock(ceph::bufferptr &&ptr)
: LogicalCachedExtent(std::move(ptr)) {}
ObjectDataBlock(const ObjectDataBlock &other)
: LogicalCachedExtent(other) {}
CachedExtentRef duplicate_for_write() final {
return CachedExtentRef(new ObjectDataBlock(*this));
};
static constexpr extent_types_t TYPE = extent_types_t::OBJECT_DATA_BLOCK;
extent_types_t get_type() const final {
return TYPE;
}
ceph::bufferlist get_delta() final {
/* Currently, we always allocate fresh ObjectDataBlock's rather than
* mutating existing ones. */
ceph_assert(0 == "Should be impossible");
}
void apply_delta(const ceph::bufferlist &bl) final {
// See get_delta()
ceph_assert(0 == "Should be impossible");
}
};
using ObjectDataBlockRef = TCachedExtentRef<ObjectDataBlock>;
class ObjectDataHandler {
public:
using base_ertr = TransactionManager::base_ertr;
struct context_t {
TransactionManager &tm;
Transaction &t;
Onode &onode;
};
/// Writes bl to [offset, offset + bl.length())
using write_ertr = base_ertr;
using write_ret = write_ertr::future<>;
write_ret write(
context_t ctx,
objaddr_t offset,
const bufferlist &bl);
/// Reads data in [offset, offset + len)
using read_ertr = base_ertr;
using read_ret = read_ertr::future<bufferlist>;
read_ret read(
context_t ctx,
objaddr_t offset,
extent_len_t len);
/// Clears data past offset
using truncate_ertr = base_ertr;
using truncate_ret = truncate_ertr::future<>;
truncate_ret truncate(
context_t ctx,
objaddr_t offset);
/// Clears data and reservation
using clear_ertr = base_ertr;
using clear_ret = clear_ertr::future<>;
clear_ret clear(context_t ctx);
private:
/// Updates region [_offset, _offset + bl.length) to bl
write_ret overwrite(
context_t ctx, ///< [in] ctx
laddr_t offset, ///< [in] write offset
bufferlist &&bl, ///< [in] buffer to write
lba_pin_list_t &&pins ///< [in] set of pins overlapping above region
);
/// Ensures object_data reserved region is prepared
write_ret prepare_data_reservation(
context_t ctx,
object_data_t &object_data,
extent_len_t size);
/// Trims data past size
clear_ret trim_data_reservation(
context_t ctx,
object_data_t &object_data,
extent_len_t size);
};
}

View File

@ -603,25 +603,21 @@ omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth)
{
ceph_assert(depth > 0);
if (depth > 1) {
return oc.tm.read_extents<OMapInnerNode>(oc.t, laddr, OMAP_BLOCK_SIZE
return oc.tm.read_extent<OMapInnerNode>(oc.t, laddr, OMAP_BLOCK_SIZE
).handle_error(
omap_load_extent_ertr::pass_further{},
crimson::ct_error::assert_all{ "Invalid error in omap_load_extent" }
).safe_then(
[](auto&& extents) {
assert(extents.size() == 1);
[[maybe_unused]] auto [laddr, e] = extents.front();
[](auto&& e) {
return seastar::make_ready_future<OMapNodeRef>(std::move(e));
});
} else {
return oc.tm.read_extents<OMapLeafNode>(oc.t, laddr, OMAP_BLOCK_SIZE
return oc.tm.read_extent<OMapLeafNode>(oc.t, laddr, OMAP_BLOCK_SIZE
).handle_error(
omap_load_extent_ertr::pass_further{},
crimson::ct_error::assert_all{ "Invalid error in omap_load_extent" }
).safe_then(
[](auto&& extents) {
assert(extents.size() == 1);
[[maybe_unused]] auto [laddr, e] = extents.front();
[](auto&& e) {
return seastar::make_ready_future<OMapNodeRef>(std::move(e));
});
}

View File

@ -16,6 +16,8 @@ namespace crimson::os::seastore {
struct onode_layout_t {
ceph_le32 size{0};
omap_root_le_t omap_root;
object_data_le_t object_data;
} __attribute__((packed));
class Transaction;

View File

@ -75,10 +75,8 @@ class SeastoreNodeExtentManager final: public NodeExtentManager {
tm_future<NodeExtentRef> read_extent(
Transaction& t, laddr_t addr, extent_len_t len) override {
logger().debug("OTree::Seastore: reading {}B at {:#x} ...", len, addr);
return tm.read_extents<SeastoreNodeExtent>(t, addr, len
).safe_then([addr, len](auto&& extents) {
assert(extents.size() == 1);
[[maybe_unused]] auto [laddr, e] = extents.front();
return tm.read_extent<SeastoreNodeExtent>(t, addr, len
).safe_then([addr, len](auto&& e) {
logger().trace("OTree::Seastore: read {}B at {:#x}",
e->get_length(), e->get_laddr());
assert(e->get_laddr() == addr);

View File

@ -19,6 +19,7 @@
#include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h"
#include "crimson/os/seastore/segment_manager/ephemeral.h"
#include "crimson/os/seastore/onode_manager.h"
#include "crimson/os/seastore/object_data_handler.h"
namespace {
seastar::logger& logger() {
@ -169,7 +170,19 @@ SeaStore::read_errorator::future<ceph::bufferlist> SeaStore::read(
size_t len,
uint32_t op_flags)
{
return read_errorator::make_ready_future<ceph::bufferlist>();
return repeat_with_onode<ceph::bufferlist>(
ch,
oid,
[=](auto &t, auto &onode) {
return ObjectDataHandler().read(
ObjectDataHandler::context_t{
*transaction_manager,
t,
onode,
},
offset,
len);
});
}
SeaStore::read_errorator::future<ceph::bufferlist> SeaStore::readv(
@ -213,10 +226,10 @@ seastar::future<struct stat> SeaStore::stat(
struct stat st;
auto &olayout = onode.get_layout();
st.st_size = olayout.size;
st.st_blksize = 4096;
st.st_blksize = transaction_manager->get_block_size();
st.st_blocks = (st.st_size + st.st_blksize - 1) / st.st_blksize;
st.st_nlink = 1;
return seastar::make_ready_future<struct stat>();
return seastar::make_ready_future<struct stat>(st);
}).handle_error(
crimson::ct_error::assert_all{
"Invalid error in SeaStore::stat"
@ -529,7 +542,9 @@ SeaStore::tm_ret SeaStore::_do_transaction_step(
uint32_t fadvise_flags = i.get_fadvise_flags();
ceph::bufferlist bl;
i.decode_bl(bl);
return _write(ctx, get_onode(op->oid), off, len, bl, fadvise_flags);
return _write(
ctx, get_onode(op->oid), off, len, std::move(bl),
fadvise_flags);
}
break;
case Transaction::OP_TRUNCATE:
@ -545,7 +560,7 @@ SeaStore::tm_ret SeaStore::_do_transaction_step(
i.decode_bl(bl);
std::map<std::string, bufferptr> to_set;
to_set[name] = bufferptr(bl.c_str(), bl.length());
return _setattrs(ctx, get_onode(op->oid), to_set);
return _setattrs(ctx, get_onode(op->oid), std::move(to_set));
}
break;
case Transaction::OP_MKCOLL:
@ -571,14 +586,14 @@ SeaStore::tm_ret SeaStore::_do_transaction_step(
{
ceph::bufferlist bl;
i.decode_bl(bl);
return _omap_set_header(ctx, get_onode(op->oid), bl);
return _omap_set_header(ctx, get_onode(op->oid), std::move(bl));
}
break;
case Transaction::OP_OMAP_RMKEYS:
{
omap_keys_t keys;
i.decode_keyset(keys);
return _omap_rmkeys(ctx, get_onode(op->oid), keys);
return _omap_rmkeys(ctx, get_onode(op->oid), std::move(keys));
}
break;
case Transaction::OP_OMAP_RMKEYRANGE:
@ -586,7 +601,9 @@ SeaStore::tm_ret SeaStore::_do_transaction_step(
string first, last;
first = i.decode_string();
last = i.decode_string();
return _omap_rmkeyrange(ctx, get_onode(op->oid), first, last);
return _omap_rmkeyrange(
ctx, get_onode(op->oid),
std::move(first), std::move(last));
}
break;
case Transaction::OP_COLL_HINT:
@ -626,13 +643,30 @@ SeaStore::tm_ret SeaStore::_touch(
SeaStore::tm_ret SeaStore::_write(
internal_context_t &ctx,
OnodeRef &onode,
uint64_t offset, size_t len, const ceph::bufferlist& bl,
uint64_t offset, size_t len,
ceph::bufferlist &&_bl,
uint32_t fadvise_flags)
{
logger().debug("{}: {} {} ~ {}",
logger().debug("SeaStore::{}: {} {} ~ {}",
__func__, *onode, offset, len);
assert(len == bl.length());
return tm_ertr::now();
{
auto &object_size = onode->get_mutable_layout(*ctx.transaction).size;
object_size = std::max<uint64_t>(
offset + len,
object_size);
}
return seastar::do_with(
std::move(_bl),
[=, &ctx, &onode](auto &bl) {
return ObjectDataHandler().write(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
*onode,
},
offset,
bl);
});
}
SeaStore::tm_ret SeaStore::_omap_set_values(
@ -685,7 +719,7 @@ SeaStore::tm_ret SeaStore::_omap_set_values(
SeaStore::tm_ret SeaStore::_omap_set_header(
internal_context_t &ctx,
OnodeRef &onode,
const ceph::bufferlist &header)
ceph::bufferlist &&header)
{
logger().debug(
"{}: {} {} bytes",
@ -697,7 +731,7 @@ SeaStore::tm_ret SeaStore::_omap_set_header(
SeaStore::tm_ret SeaStore::_omap_rmkeys(
internal_context_t &ctx,
OnodeRef &onode,
const omap_keys_t& keys)
omap_keys_t &&keys)
{
logger().debug(
"{} {} {} keys",
@ -709,9 +743,11 @@ SeaStore::tm_ret SeaStore::_omap_rmkeys(
return seastar::do_with(
BtreeOMapManager(*transaction_manager),
onode->get_layout().omap_root.get(),
[&ctx, &onode, &keys, this](
std::move(keys),
[&ctx, &onode, this](
auto &omap_manager,
auto &omap_root) {
auto &omap_root,
auto &keys) {
return crimson::do_for_each(
keys.begin(),
keys.end(),
@ -733,8 +769,8 @@ SeaStore::tm_ret SeaStore::_omap_rmkeys(
SeaStore::tm_ret SeaStore::_omap_rmkeyrange(
internal_context_t &ctx,
OnodeRef &onode,
const std::string &first,
const std::string &last)
std::string first,
std::string last)
{
logger().debug(
"{} {} first={} last={}",
@ -748,15 +784,22 @@ SeaStore::tm_ret SeaStore::_truncate(
OnodeRef &onode,
uint64_t size)
{
logger().debug("{} onode={} size={}",
logger().debug("SeaStore::{} onode={} size={}",
__func__, *onode, size);
return tm_ertr::now();
onode->get_mutable_layout(*ctx.transaction).size = size;
return ObjectDataHandler().truncate(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
*onode
},
size);
}
SeaStore::tm_ret SeaStore::_setattrs(
internal_context_t &ctx,
OnodeRef &onode,
std::map<std::string,bufferptr>& aset)
std::map<std::string,bufferptr> &&aset)
{
logger().debug("{} onode={}",
__func__, *onode);

View File

@ -228,7 +228,8 @@ private:
tm_ret _write(
internal_context_t &ctx,
OnodeRef &onode,
uint64_t offset, size_t len, const ceph::bufferlist& bl,
uint64_t offset, size_t len,
ceph::bufferlist &&bl,
uint32_t fadvise_flags);
tm_ret _omap_set_values(
internal_context_t &ctx,
@ -237,23 +238,23 @@ private:
tm_ret _omap_set_header(
internal_context_t &ctx,
OnodeRef &onode,
const ceph::bufferlist &header);
ceph::bufferlist &&header);
tm_ret _omap_rmkeys(
internal_context_t &ctx,
OnodeRef &onode,
const omap_keys_t& aset);
omap_keys_t &&aset);
tm_ret _omap_rmkeyrange(
internal_context_t &ctx,
OnodeRef &onode,
const std::string &first,
const std::string &last);
std::string first,
std::string last);
tm_ret _truncate(
internal_context_t &ctx,
OnodeRef &onode, uint64_t size);
tm_ret _setattrs(
internal_context_t &ctx,
OnodeRef &onode,
std::map<std::string,bufferptr>& aset);
std::map<std::string,bufferptr> &&aset);
tm_ret _create_collection(
internal_context_t &ctx,
const coll_t& cid, int bits);

View File

@ -13,6 +13,8 @@ std::ostream &segment_to_stream(std::ostream &out, const segment_id_t &t)
return out << "BLOCK_REL_SEG";
else if (t == RECORD_REL_SEG_ID)
return out << "RECORD_REL_SEG";
else if (t == ZERO_SEG_ID)
return out << "ZERO_SEG";
else if (t == FAKE_SEG_ID)
return out << "FAKE_SEG";
else
@ -53,10 +55,6 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
return out << "LADDR_INTERNAL";
case extent_types_t::LADDR_LEAF:
return out << "LADDR_LEAF";
case extent_types_t::EXTMAP_INNER:
return out << "EXTMAP_INNER";
case extent_types_t::EXTMAP_LEAF:
return out << "EXTMAP_LEAF";
case extent_types_t::ONODE_BLOCK_STAGED:
return out << "ONODE_BLOCK_STAGED";
case extent_types_t::OMAP_INNER:
@ -65,6 +63,8 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
return out << "OMAP_LEAF";
case extent_types_t::COLL_BLOCK:
return out << "COLL_BLOCK";
case extent_types_t::OBJECT_DATA_BLOCK:
return out << "OBJECT_DATA_BLOCK";
case extent_types_t::TEST_BLOCK:
return out << "TEST_BLOCK";
case extent_types_t::TEST_BLOCK_PHYSICAL:

View File

@ -43,11 +43,16 @@ constexpr segment_id_t RECORD_REL_SEG_ID =
std::numeric_limits<segment_id_t>::max() - 2;
constexpr segment_id_t BLOCK_REL_SEG_ID =
std::numeric_limits<segment_id_t>::max() - 3;
// for tests which generate fake paddrs
constexpr segment_id_t FAKE_SEG_ID =
std::numeric_limits<segment_id_t>::max() - 4;
/* Used to denote references to notional zero filled segment, mainly
* in denoting reserved laddr ranges for unallocated object data.
*/
constexpr segment_id_t ZERO_SEG_ID =
std::numeric_limits<segment_id_t>::max() - 5;
std::ostream &segment_to_stream(std::ostream &, const segment_id_t &t);
// Offset within a segment on disk, see SegmentManager
@ -103,6 +108,27 @@ struct paddr_t {
return segment == BLOCK_REL_SEG_ID;
}
/// Denotes special zero segment addr
bool is_zero() const {
return segment == ZERO_SEG_ID;
}
/// Denotes special null segment addr
bool is_null() const {
return segment == NULL_SEG_ID;
}
/**
* is_real
*
* indicates whether addr reflects a physical location, absolute
* or relative. FAKE segments also count as real so as to reflect
* the way in which unit tests use them.
*/
bool is_real() const {
return !is_zero() && !is_null();
}
paddr_t add_offset(segment_off_t o) const {
return paddr_t{segment, offset + o};
}
@ -175,6 +201,9 @@ constexpr paddr_t make_block_relative_paddr(segment_off_t off) {
constexpr paddr_t make_fake_paddr(segment_off_t off) {
return paddr_t{FAKE_SEG_ID, off};
}
constexpr paddr_t zero_paddr() {
return paddr_t{ZERO_SEG_ID, 0};
}
struct __attribute((packed)) paddr_le_t {
ceph_le32 segment = ceph_le32(NULL_SEG_ID);
@ -195,7 +224,8 @@ struct __attribute((packed)) paddr_le_t {
std::ostream &operator<<(std::ostream &out, const paddr_t &rhs);
using objaddr_t = uint32_t;
constexpr objaddr_t OBJ_ADDR_MIN = std::numeric_limits<objaddr_t>::min();
constexpr objaddr_t OBJ_ADDR_MAX = std::numeric_limits<objaddr_t>::max();
constexpr objaddr_t OBJ_ADDR_NULL = OBJ_ADDR_MAX - 1;
/* Monotonically increasing identifier for the location of a
* journal_record.
@ -282,12 +312,11 @@ enum class extent_types_t : uint8_t {
ROOT = 0,
LADDR_INTERNAL = 1,
LADDR_LEAF = 2,
EXTMAP_INNER = 4,
EXTMAP_LEAF = 5,
OMAP_INNER = 6,
OMAP_LEAF = 7,
ONODE_BLOCK_STAGED = 8,
COLL_BLOCK = 9,
OMAP_INNER = 4,
OMAP_LEAF = 5,
ONODE_BLOCK_STAGED = 6,
COLL_BLOCK = 7,
OBJECT_DATA_BLOCK = 8,
// Test Block Types
TEST_BLOCK = 0xF0,
@ -367,6 +396,71 @@ struct record_t {
std::vector<delta_info_t> deltas;
};
class object_data_t {
laddr_t reserved_data_base = L_ADDR_NULL;
extent_len_t reserved_data_len = 0;
bool dirty = false;
public:
object_data_t(
laddr_t reserved_data_base,
extent_len_t reserved_data_len)
: reserved_data_base(reserved_data_base),
reserved_data_len(reserved_data_len) {}
laddr_t get_reserved_data_base() const {
return reserved_data_base;
}
extent_len_t get_reserved_data_len() const {
return reserved_data_len;
}
bool is_null() const {
return reserved_data_base == L_ADDR_NULL;
}
bool must_update() const {
return dirty;
}
void update_reserved(
laddr_t base,
extent_len_t len) {
dirty = true;
reserved_data_base = base;
reserved_data_len = len;
}
void update_len(
extent_len_t len) {
dirty = true;
reserved_data_len = len;
}
void clear() {
dirty = true;
reserved_data_base = L_ADDR_NULL;
reserved_data_len = 0;
}
};
struct __attribute__((packed)) object_data_le_t {
laddr_le_t reserved_data_base = laddr_le_t(L_ADDR_NULL);
extent_len_le_t reserved_data_len = init_extent_len_le(0);
void update(const object_data_t &nroot) {
reserved_data_base = nroot.get_reserved_data_base();
reserved_data_len = init_extent_len_le(nroot.get_reserved_data_len());
}
object_data_t get() const {
return object_data_t(
reserved_data_base,
reserved_data_len);
}
};
struct omap_root_t {
laddr_t addr = L_ADDR_NULL;
depth_t depth = 0;

View File

@ -156,7 +156,7 @@ TransactionManager::ref_ret TransactionManager::dec_ref(
{
return lba_manager->decref_extent(t, offset
).safe_then([this, offset, &t](auto result) -> ref_ret {
if (result.refcount == 0) {
if (result.refcount == 0 && !result.addr.is_zero()) {
logger().debug(
"TransactionManager::dec_ref: offset {} refcount 0",
offset);

View File

@ -96,64 +96,91 @@ public:
}
/**
* Read extents corresponding to specified lba range
* get_pins
*
* Get logical pins overlapping offset~length
*/
using read_extent_ertr = LBAManager::get_mapping_ertr::extend_ertr<
SegmentManager::read_ertr>;
template <typename T>
using read_extent_ret = read_extent_ertr::future<lextent_list_t<T>>;
template <typename T>
read_extent_ret<T> read_extents(
using get_pins_ertr = LBAManager::get_mapping_ertr;
using get_pins_ret = get_pins_ertr::future<lba_pin_list_t>;
get_pins_ret get_pins(
Transaction &t,
laddr_t offset,
extent_len_t length)
{
std::unique_ptr<lextent_list_t<T>> ret =
std::make_unique<lextent_list_t<T>>();
auto &ret_ref = *ret;
std::unique_ptr<lba_pin_list_t> pin_list =
std::make_unique<lba_pin_list_t>();
auto &pin_list_ref = *pin_list;
extent_len_t length) {
return lba_manager->get_mapping(
t, offset, length
).safe_then([this, &t, &pin_list_ref, &ret_ref](auto pins) {
t, offset, length);
}
/**
* pin_to_extent
*
* Get extent mapped at pin.
*/
using pin_to_extent_ertr = get_pins_ertr::extend_ertr<
SegmentManager::read_ertr>;
template <typename T>
using pin_to_extent_ret = pin_to_extent_ertr::future<
TCachedExtentRef<T>>;
template <typename T>
pin_to_extent_ret<T> pin_to_extent(
Transaction &t,
LBAPinRef pin) {
using ret = pin_to_extent_ret<T>;
crimson::get_logger(ceph_subsys_filestore).debug(
"pin_to_extent: getting extent {}",
*pin);
return cache->get_extent<T>(
t,
pin->get_paddr(),
pin->get_length()
).safe_then([this, pin=std::move(pin)](auto ref) mutable -> ret {
if (!ref->has_pin()) {
if (pin->has_been_invalidated() || ref->has_been_invalidated()) {
return crimson::ct_error::eagain::make();
} else {
ref->set_pin(std::move(pin));
lba_manager->add_pin(ref->get_pin());
}
}
crimson::get_logger(ceph_subsys_filestore).debug(
"read_extents: mappings {}",
pins);
pins.swap(pin_list_ref);
return crimson::do_for_each(
pin_list_ref.begin(),
pin_list_ref.end(),
[this, &t, &ret_ref](auto &pin) {
crimson::get_logger(ceph_subsys_filestore).debug(
"read_extents: get_extent {}~{}",
pin->get_paddr(),
pin->get_length());
return cache->get_extent<T>(
t,
pin->get_paddr(),
pin->get_length()
).safe_then([this, &pin, &ret_ref](auto ref) mutable
-> read_extent_ertr::future<> {
if (!ref->has_pin()) {
if (pin->has_been_invalidated() || ref->has_been_invalidated()) {
return crimson::ct_error::eagain::make();
} else {
ref->set_pin(std::move(pin));
lba_manager->add_pin(ref->get_pin());
}
}
ret_ref.push_back(std::make_pair(ref->get_laddr(), ref));
crimson::get_logger(ceph_subsys_filestore).debug(
"read_extents: got extent {}",
*ref);
return read_extent_ertr::now();
});
});
}).safe_then([ret=std::move(ret), pin_list=std::move(pin_list)]() mutable {
return read_extent_ret<T>(
read_extent_ertr::ready_future_marker{},
std::move(*ret));
"pin_to_extent: got extent {}",
*ref);
return pin_to_extent_ret<T>(
pin_to_extent_ertr::ready_future_marker{},
std::move(ref));
});
}
/**
* read_extent
*
* Read extent of type T at offset~length
*/
using read_extent_ertr = get_pins_ertr::extend_ertr<
SegmentManager::read_ertr>;
template <typename T>
using read_extent_ret = read_extent_ertr::future<
TCachedExtentRef<T>>;
template <typename T>
read_extent_ret<T> read_extent(
Transaction &t,
laddr_t offset,
extent_len_t length) {
return get_pins(
t, offset, length
).safe_then([this, &t, offset, length](auto pins) {
if (pins.size() != 1 || !pins.front()->get_paddr().is_real()) {
auto &logger = crimson::get_logger(ceph_subsys_filestore);
logger.error(
"TransactionManager::read_extent offset {} len {} got {} extents:",
offset,
length,
pins.size());
for (auto &i: pins) {
logger.error("\t{}", *i);
}
ceph_assert(0 == "Should be impossible");
}
return pin_to_extent<T>(t, std::move(pins.front()));
});
}
@ -240,6 +267,31 @@ public:
});
}
using reserve_extent_ertr = alloc_extent_ertr;
using reserve_extent_ret = reserve_extent_ertr::future<LBAPinRef>;
reserve_extent_ret reserve_region(
Transaction &t,
laddr_t hint,
extent_len_t len) {
return lba_manager->alloc_extent(
t,
hint,
len,
zero_paddr());
}
using find_hole_ertr = LBAManager::find_hole_ertr;
using find_hole_ret = LBAManager::find_hole_ret;
find_hole_ret find_hole(
Transaction &t,
laddr_t hint,
extent_len_t len) {
return lba_manager->find_hole(
t,
hint,
len);
}
/* alloc_extents
*
* allocates more than one new blocks of type T.
@ -372,6 +424,10 @@ public:
croot->get_root().collection_root.update(cmroot);
}
extent_len_t get_block_size() const {
return segment_manager.get_block_size();
}
~TransactionManager();
private:

View File

@ -556,6 +556,44 @@ public:
);
}
auto read_extents(
Transaction &t,
laddr_t offset,
extent_len_t length) {
return seastar::do_with(
lba_pin_list_t(),
lextent_list_t<TestBlock>(),
[this, &t, offset, length](auto &pins, auto &ret) {
return tm->get_pins(
t, offset, length
).safe_then([this, &t, &pins, &ret](auto _pins) {
_pins.swap(pins);
logger().debug("read_extents: mappings {}", pins);
return crimson::do_for_each(
pins.begin(),
pins.end(),
[this, &t, &ret](auto &&pin) {
logger().debug(
"read_extents: get_extent {}~{}",
pin->get_paddr(),
pin->get_length());
return tm->pin_to_extent<TestBlock>(
t,
std::move(pin)
).safe_then([this, &ret](auto ref) mutable {
ret.push_back(std::make_pair(ref->get_laddr(), ref));
logger().debug(
"read_extents: got extent {}",
*ref);
return seastar::now();
});
}).safe_then([&ret] {
return std::move(ret);
});
});
});
}
seastar::future<bufferlist> read(
off_t offset,
size_t size) final {
@ -568,7 +606,7 @@ public:
return seastar::do_with(
tm->create_transaction(),
[=, &blret](auto &t) {
return tm->read_extents<TestBlock>(*t, offset, size
return read_extents(*t, offset, size
).safe_then([=, &blret](auto ext_list) mutable {
size_t cur = offset;
for (auto &i: ext_list) {

View File

@ -38,13 +38,13 @@ target_link_libraries(
crimson::gtest
crimson-seastore)
add_executable(unittest-extmap-manager
test_extmap_manager.cc
add_executable(unittest-object-data-handler
test_object_data_handler.cc
../gtest_seastar.cc)
add_ceph_test(unittest-extmap-manager
unittest-extmap-manager --memory 256M --smp 1)
add_ceph_unittest(unittest-object-data-handler
--memory 256M --smp 1)
target_link_libraries(
unittest-extmap-manager
unittest-object-data-handler
crimson::gtest
crimson-seastore
crimson-os

View File

@ -1,286 +0,0 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include "test/crimson/gtest_seastar.h"
#include "test/crimson/seastore/transaction_manager_test_state.h"
#include "crimson/os/seastore/cache.h"
#include "crimson/os/seastore/transaction_manager.h"
#include "crimson/os/seastore/segment_manager.h"
#include "crimson/os/seastore/extentmap_manager.h"
#include "test/crimson/seastore/test_block.h"
using namespace crimson;
using namespace crimson::os;
using namespace crimson::os::seastore;
namespace {
[[maybe_unused]] seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_test);
}
}
struct extentmap_manager_test_t :
public seastar_test_suite_t,
TMTestState {
ExtentMapManagerRef extmap_manager;
extentmap_manager_test_t() {}
seastar::future<> set_up_fut() final {
return tm_setup().then([this] {
extmap_manager = extentmap_manager::create_extentmap_manager(*tm);
return seastar::now();
});
}
seastar::future<> tear_down_fut() final {
return tm_teardown().then([this] {
extmap_manager.reset();
return seastar::now();
});
}
using test_extmap_t = std::map<uint32_t, lext_map_val_t>;
test_extmap_t test_ext_mappings;
extent_mapping_t insert_extent(
extmap_root_t &extmap_root,
Transaction &t,
uint32_t lo,
lext_map_val_t val) {
auto extent = extmap_manager->add_lextent(extmap_root, t, lo, val).unsafe_get0();
EXPECT_EQ(lo, extent.logical_offset);
EXPECT_EQ(val.laddr, extent.laddr);
EXPECT_EQ(val.length, extent.length);
test_ext_mappings.emplace(extent.logical_offset,
lext_map_val_t{extent.laddr, extent.length});
return extent;
}
extent_map_list_t find_extent(
extmap_root_t &extmap_root,
Transaction &t,
uint32_t lo,
uint32_t len) {
auto extent = extmap_manager->find_lextent(extmap_root, t, lo, len).unsafe_get0();
EXPECT_EQ(lo, extent.front().logical_offset);
EXPECT_EQ(len, extent.front().length);
return extent;
}
extent_map_list_t findno_extent(
extmap_root_t &extmap_root,
Transaction &t,
uint32_t lo,
uint32_t len) {
auto extent = extmap_manager->find_lextent(extmap_root, t, lo, len).unsafe_get0();
EXPECT_EQ(extent.empty(), true);
return extent;
}
void rm_extent(
extmap_root_t &extmap_root,
Transaction &t,
uint32_t lo,
lext_map_val_t val ) {
auto ret = extmap_manager->rm_lextent(extmap_root, t, lo, val).unsafe_get0();
EXPECT_TRUE(ret);
test_ext_mappings.erase(lo);
}
void check_mappings(extmap_root_t &extmap_root, Transaction &t) {
for (const auto& [lo, ext]: test_ext_mappings){
const auto ext_list = find_extent(extmap_root, t, lo, ext.length);
ASSERT_EQ(ext_list.size(), 1);
const auto& ext_map = ext_list.front();
EXPECT_EQ(ext.laddr, ext_map.laddr);
EXPECT_EQ(ext.length, ext_map.length);
}
}
void check_mappings(extmap_root_t &extmap_root) {
auto t = tm->create_transaction();
check_mappings(extmap_root, *t);
}
void replay() {
logger().debug("{}: begin", __func__);
restart();
extmap_manager = extentmap_manager::create_extentmap_manager(*tm);
logger().debug("{}: end", __func__);
}
void submit_transaction(TransactionRef &&t) {
tm->submit_transaction(std::move(t)).unsafe_get0();
segment_cleaner->run_until_halt().get0();
}
};
TEST_F(extentmap_manager_test_t, basic)
{
run_async([this] {
extmap_root_t extmap_root(0, L_ADDR_NULL);
{
auto t = tm->create_transaction();
extmap_root = extmap_manager->initialize_extmap(*t).unsafe_get0();
submit_transaction(std::move(t));
}
uint32_t len = 4096;
uint32_t lo = 0x1 * len;
{
auto t = tm->create_transaction();
logger().debug("first transaction");
[[maybe_unused]] auto addref = insert_extent(extmap_root, *t, lo, {lo, len});
[[maybe_unused]] auto seekref = find_extent(extmap_root, *t, lo, len);
submit_transaction(std::move(t));
}
{
auto t = tm->create_transaction();
logger().debug("second transaction");
auto seekref = find_extent(extmap_root, *t, lo, len);
rm_extent(extmap_root, *t, lo, {seekref.front().laddr, len});
[[maybe_unused]] auto seekref2 = findno_extent(extmap_root, *t, lo, len);
submit_transaction(std::move(t));
}
{
auto t = tm->create_transaction();
logger().debug("third transaction");
[[maybe_unused]] auto seekref = findno_extent(extmap_root, *t, lo, len);
submit_transaction(std::move(t));
}
});
}
TEST_F(extentmap_manager_test_t, force_leafnode_split)
{
run_async([this] {
extmap_root_t extmap_root(0, L_ADDR_NULL);
{
auto t = tm->create_transaction();
extmap_root = extmap_manager->initialize_extmap(*t).unsafe_get0();
submit_transaction(std::move(t));
}
uint32_t len = 4096;
uint32_t lo = 0;
for (unsigned i = 0; i < 40; i++) {
auto t = tm->create_transaction();
logger().debug("opened transaction");
for (unsigned j = 0; j < 10; ++j) {
[[maybe_unused]] auto addref = insert_extent(extmap_root, *t, lo, {lo, len});
lo += len;
if ((i % 20 == 0) && (j == 5)) {
check_mappings(extmap_root, *t);
}
}
logger().debug("force split submit transaction i = {}", i);
submit_transaction(std::move(t));
check_mappings(extmap_root);
}
});
}
TEST_F(extentmap_manager_test_t, force_leafnode_split_merge)
{
run_async([this] {
extmap_root_t extmap_root(0, L_ADDR_NULL);
{
auto t = tm->create_transaction();
extmap_root = extmap_manager->initialize_extmap(*t).unsafe_get0();
submit_transaction(std::move(t));
}
uint32_t len = 4096;
uint32_t lo = 0;
for (unsigned i = 0; i < 80; i++) {
auto t = tm->create_transaction();
logger().debug("opened split_merge transaction");
for (unsigned j = 0; j < 5; ++j) {
[[maybe_unused]] auto addref = insert_extent(extmap_root, *t, lo, {lo, len});
lo += len;
if ((i % 10 == 0) && (j == 3)) {
check_mappings(extmap_root, *t);
}
}
logger().debug("submitting transaction");
submit_transaction(std::move(t));
if (i % 50 == 0) {
check_mappings(extmap_root);
}
}
auto t = tm->create_transaction();
int i = 0;
for (auto iter = test_ext_mappings.begin(); iter != test_ext_mappings.end();) {
auto [lo, ext] = *iter;
++iter;
if (i % 3 != 0) {
rm_extent(extmap_root, *t, lo, ext);
}
i++;
if (i % 10 == 0) {
logger().debug("submitting transaction i= {}", i);
submit_transaction(std::move(t));
t = tm->create_transaction();
}
if (i % 100 == 0) {
logger().debug("check_mappings i= {}", i);
check_mappings(extmap_root, *t);
check_mappings(extmap_root);
}
}
logger().debug("finally submitting transaction ");
submit_transaction(std::move(t));
});
}
TEST_F(extentmap_manager_test_t, force_leafnode_split_merge_replay)
{
run_async([this] {
extmap_root_t extmap_root(0, L_ADDR_NULL);
{
auto t = tm->create_transaction();
extmap_root = extmap_manager->initialize_extmap(*t).unsafe_get0();
submit_transaction(std::move(t));
replay();
}
uint32_t len = 4096;
uint32_t lo = 0;
for (unsigned i = 0; i < 50; i++) {
auto t = tm->create_transaction();
logger().debug("opened split_merge transaction");
for (unsigned j = 0; j < 5; ++j) {
[[maybe_unused]] auto addref = insert_extent(extmap_root, *t, lo, {lo, len});
lo += len;
}
logger().debug("submitting transaction");
submit_transaction(std::move(t));
}
replay();
auto t = tm->create_transaction();
int i = 0;
for (auto iter = test_ext_mappings.begin(); iter != test_ext_mappings.end();) {
auto [lo, ext] = *iter;
++iter;
rm_extent(extmap_root, *t, lo, ext);
i++;
if (i % 10 == 0) {
logger().debug("submitting transaction i= {}", i);
submit_transaction(std::move(t));
t = tm->create_transaction();
}
if (i% 100 == 0){
check_mappings(extmap_root);
}
}
logger().debug("finally submitting transaction ");
submit_transaction(std::move(t));
replay();
check_mappings(extmap_root);
});
}

View File

@ -0,0 +1,300 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include "test/crimson/gtest_seastar.h"
#include "test/crimson/seastore/transaction_manager_test_state.h"
#include "crimson/os/seastore/onode.h"
#include "crimson/os/seastore/object_data_handler.h"
using namespace crimson;
using namespace crimson::os;
using namespace crimson::os::seastore;
namespace {
[[maybe_unused]] seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_test);
}
}
class TestOnode : public Onode {
onode_layout_t layout;
bool dirty = false;
public:
const onode_layout_t &get_layout() const final {
return layout;
}
onode_layout_t &get_mutable_layout(Transaction &t) final {
dirty = true;
return layout;
}
bool is_dirty() const { return dirty; }
~TestOnode() final = default;
};
struct object_data_handler_test_t:
public seastar_test_suite_t,
TMTestState {
OnodeRef onode;
bufferptr known_contents;
extent_len_t size = 0;
object_data_handler_test_t() {}
auto submit_transaction(TransactionRef &&t) {
return tm->submit_transaction(std::move(t)
).safe_then([this] {
return segment_cleaner->run_until_halt();
});
}
void write(Transaction &t, objaddr_t offset, extent_len_t len, char fill) {
ceph_assert(offset + len <= known_contents.length());
size = std::max<extent_len_t>(size, offset + len);
memset(
known_contents.c_str() + offset,
fill,
len);
bufferlist bl;
bl.append(
bufferptr(
known_contents,
offset,
len));
return ObjectDataHandler().write(
ObjectDataHandler::context_t{
*tm,
t,
*onode,
},
offset,
bl).unsafe_get0();
}
void write(objaddr_t offset, extent_len_t len, char fill) {
auto t = tm->create_transaction();
write(*t, offset, len, fill);
return submit_transaction(std::move(t)).unsafe_get0();
}
void truncate(Transaction &t, objaddr_t offset) {
if (size > offset) {
memset(
known_contents.c_str() + offset,
0,
size - offset);
ObjectDataHandler().truncate(
ObjectDataHandler::context_t{
*tm,
t,
*onode
},
offset).unsafe_get0();
}
size = offset;
}
void truncate(objaddr_t offset) {
auto t = tm->create_transaction();
truncate(*t, offset);
return submit_transaction(std::move(t)).unsafe_get0();
}
void read(Transaction &t, objaddr_t offset, extent_len_t len) {
bufferlist bl = ObjectDataHandler().read(
ObjectDataHandler::context_t{
*tm,
t,
*onode
},
offset,
len).unsafe_get0();
bufferlist known;
known.append(
bufferptr(
known_contents,
offset,
len));
EXPECT_EQ(bl.length(), known.length());
EXPECT_EQ(bl, known);
}
void read(objaddr_t offset, extent_len_t len) {
auto t = tm->create_transaction();
read(*t, offset, len);
}
void read_near(objaddr_t offset, extent_len_t len, extent_len_t fuzz) {
auto fuzzes = std::vector<int32_t>{-1 * (int32_t)fuzz, 0, (int32_t)fuzz};
for (auto left_fuzz : fuzzes) {
for (auto right_fuzz : fuzzes) {
read(offset + left_fuzz, len - left_fuzz + right_fuzz);
}
}
}
seastar::future<> set_up_fut() final {
onode = new TestOnode{};
known_contents = buffer::create(4<<20 /* 4MB */);
size = 0;
return tm_setup();
}
seastar::future<> tear_down_fut() final {
onode.reset();
size = 0;
return tm_teardown();
}
};
TEST_F(object_data_handler_test_t, single_write)
{
run_async([this] {
write(1<<20, 8<<10, 'c');
read_near(1<<20, 8<<10, 1);
read_near(1<<20, 8<<10, 512);
});
}
TEST_F(object_data_handler_test_t, multi_write)
{
run_async([this] {
write((1<<20) - (4<<10), 4<<10, 'a');
write(1<<20, 4<<10, 'b');
write((1<<20) + (4<<10), 4<<10, 'c');
read_near(1<<20, 4<<10, 1);
read_near(1<<20, 4<<10, 512);
read_near((1<<20)-(4<<10), 12<<10, 1);
read_near((1<<20)-(4<<10), 12<<10, 512);
});
}
TEST_F(object_data_handler_test_t, write_hole)
{
run_async([this] {
write((1<<20) - (4<<10), 4<<10, 'a');
// hole at 1<<20
write((1<<20) + (4<<10), 4<<10, 'c');
read_near(1<<20, 4<<10, 1);
read_near(1<<20, 4<<10, 512);
read_near((1<<20)-(4<<10), 12<<10, 1);
read_near((1<<20)-(4<<10), 12<<10, 512);
});
}
TEST_F(object_data_handler_test_t, overwrite_single)
{
run_async([this] {
write((1<<20), 4<<10, 'a');
write((1<<20), 4<<10, 'c');
read_near(1<<20, 4<<10, 1);
read_near(1<<20, 4<<10, 512);
});
}
TEST_F(object_data_handler_test_t, overwrite_double)
{
run_async([this] {
write((1<<20), 4<<10, 'a');
write((1<<20)+(4<<10), 4<<10, 'c');
write((1<<20), 8<<10, 'b');
read_near(1<<20, 8<<10, 1);
read_near(1<<20, 8<<10, 512);
read_near(1<<20, 4<<10, 1);
read_near(1<<20, 4<<10, 512);
read_near((1<<20) + (4<<10), 4<<10, 1);
read_near((1<<20) + (4<<10), 4<<10, 512);
});
}
TEST_F(object_data_handler_test_t, overwrite_partial)
{
run_async([this] {
write((1<<20), 12<<10, 'a');
read_near(1<<20, 12<<10, 1);
write((1<<20)+(8<<10), 4<<10, 'b');
read_near(1<<20, 12<<10, 1);
write((1<<20)+(4<<10), 4<<10, 'c');
read_near(1<<20, 12<<10, 1);
write((1<<20), 4<<10, 'd');
read_near(1<<20, 12<<10, 1);
read_near(1<<20, 12<<10, 512);
read_near(1<<20, 4<<10, 1);
read_near(1<<20, 4<<10, 512);
read_near((1<<20) + (4<<10), 4<<10, 1);
read_near((1<<20) + (4<<10), 4<<10, 512);
});
}
TEST_F(object_data_handler_test_t, unaligned_write)
{
run_async([this] {
objaddr_t base = 1<<20;
write(base, (4<<10)+(1<<10), 'a');
read_near(base-(4<<10), 12<<10, 512);
base = (1<<20) + (64<<10);
write(base+(1<<10), (4<<10)+(1<<10), 'b');
read_near(base-(4<<10), 12<<10, 512);
base = (1<<20) + (128<<10);
write(base-(1<<10), (4<<10)+(2<<20), 'c');
read_near(base-(4<<10), 12<<10, 512);
});
}
TEST_F(object_data_handler_test_t, unaligned_overwrite)
{
run_async([this] {
objaddr_t base = 1<<20;
write(base, (128<<10) + (16<<10), 'x');
write(base, (4<<10)+(1<<10), 'a');
read_near(base-(4<<10), 12<<10, 2<<10);
base = (1<<20) + (64<<10);
write(base+(1<<10), (4<<10)+(1<<10), 'b');
read_near(base-(4<<10), 12<<10, 2<<10);
base = (1<<20) + (128<<10);
write(base-(1<<10), (4<<10)+(2<<20), 'c');
read_near(base-(4<<10), 12<<10, 2<<10);
read(base, (128<<10) + (16<<10));
});
}
TEST_F(object_data_handler_test_t, truncate)
{
run_async([this] {
objaddr_t base = 1<<20;
write(base, 8<<10, 'a');
write(base+(8<<10), 8<<10, 'b');
write(base+(16<<10), 8<<10, 'c');
truncate(base + (32<<10));
read(base, 64<<10);
truncate(base + (24<<10));
read(base, 64<<10);
truncate(base + (12<<10));
read(base, 64<<10);
truncate(base - (12<<10));
read(base, 64<<10);
});
}

View File

@ -59,6 +59,7 @@ struct seastore_test_t :
const ghobject_t oid;
std::map<string, bufferlist> omap;
bufferlist contents;
void set_omap(
CTransaction &t,
@ -84,6 +85,88 @@ struct seastore_test_t :
std::move(t)).get0();
}
void write(
SeaStore &seastore,
CTransaction &t,
uint64_t offset,
bufferlist bl) {
bufferlist new_contents;
if (offset > 0 && contents.length()) {
new_contents.substr_of(
contents,
0,
std::min<size_t>(offset, contents.length())
);
}
new_contents.append_zero(offset - new_contents.length());
new_contents.append(bl);
auto tail_offset = offset + bl.length();
if (contents.length() > tail_offset) {
bufferlist tail;
tail.substr_of(
contents,
tail_offset,
contents.length() - tail_offset);
new_contents.append(tail);
}
contents.swap(new_contents);
t.write(
cid,
oid,
offset,
bl.length(),
bl);
}
void write(
SeaStore &seastore,
uint64_t offset,
bufferlist bl) {
CTransaction t;
write(seastore, t, offset, bl);
seastore.do_transaction(
coll,
std::move(t)).get0();
}
void write(
SeaStore &seastore,
uint64_t offset,
size_t len,
char fill) {
auto buffer = bufferptr(buffer::create(len));
::memset(buffer.c_str(), fill, len);
bufferlist bl;
bl.append(buffer);
write(seastore, offset, bl);
}
void read(
SeaStore &seastore,
uint64_t offset,
uint64_t len) {
bufferlist to_check;
to_check.substr_of(
contents,
offset,
len);
auto ret = seastore.read(
coll,
oid,
offset,
len).unsafe_get0();
EXPECT_EQ(ret.length(), to_check.length());
EXPECT_EQ(ret, to_check);
}
void check_size(SeaStore &seastore) {
auto st = seastore.stat(
coll,
oid).get0();
EXPECT_EQ(contents.length(), st.st_size);
}
void check_omap_key(
SeaStore &seastore,
const string &key) {
@ -241,3 +324,21 @@ TEST_F(seastore_test_t, omap_test_iterator)
test_obj.check_omap(*seastore);
});
}
TEST_F(seastore_test_t, simple_extent_test)
{
run_async([this] {
auto &test_obj = get_object(make_oid(0));
test_obj.write(
*seastore,
1024,
1024,
'a');
test_obj.read(
*seastore,
1024,
1024);
test_obj.check_size(*seastore);
});
}

View File

@ -424,13 +424,9 @@ struct transaction_manager_test_t :
ceph_assert(test_mappings.contains(addr, t.mapping_delta));
ceph_assert(test_mappings.get(addr, t.mapping_delta).desc.len == len);
auto ret_list = tm->read_extents<TestBlock>(
auto ext = tm->read_extent<TestBlock>(
*t.t, addr, len
).unsafe_get0();
EXPECT_EQ(ret_list.size(), 1);
auto &ext = ret_list.begin()->second;
auto &laddr = ret_list.begin()->first;
EXPECT_EQ(addr, laddr);
EXPECT_EQ(addr, ext->get_laddr());
return ext;
}