crimson/os/seastore/lba_manager: add rewrite_extent

Adds support for writing out an extent to a new location and updating
lba mappings.

Signed-off-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
Samuel Just 2020-08-11 10:47:28 -07:00
parent 59d3dff69e
commit c907e2a11d
8 changed files with 306 additions and 5 deletions

View File

@ -115,6 +115,36 @@ void Cache::replace_extent(CachedExtentRef next, CachedExtentRef prev)
}
}
CachedExtentRef Cache::alloc_new_extent_by_type(
Transaction &t, ///< [in, out] current transaction
extent_types_t type, ///< [in] type tag
segment_off_t length ///< [in] length
)
{
switch (type) {
case extent_types_t::ROOT:
assert(0 == "ROOT is never directly alloc'd");
return CachedExtentRef();
case extent_types_t::LADDR_INTERNAL:
return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length);
case extent_types_t::LADDR_LEAF:
return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length);
case extent_types_t::ONODE_BLOCK:
return alloc_new_extent<OnodeBlock>(t, length);
case extent_types_t::TEST_BLOCK:
return alloc_new_extent<TestBlock>(t, length);
case extent_types_t::TEST_BLOCK_PHYSICAL:
return alloc_new_extent<TestBlockPhysical>(t, length);
case extent_types_t::NONE: {
ceph_assert(0 == "NONE is an invalid extent type");
return CachedExtentRef();
}
default:
ceph_assert(0 == "impossible");
return CachedExtentRef();
}
}
CachedExtentRef Cache::duplicate_for_write(
Transaction &t,
CachedExtentRef i) {

View File

@ -244,6 +244,17 @@ public:
return ret;
}
/**
* alloc_new_extent
*
* Allocates a fresh extent. addr will be relative until commit.
*/
CachedExtentRef alloc_new_extent_by_type(
Transaction &t, ///< [in, out] current transaction
extent_types_t type, ///< [in] type tag
segment_off_t length ///< [in] length
);
/**
* Allocates mutable buffer from extent_set on offset~len
*
@ -354,6 +365,31 @@ public:
});
}
/**
* update_extent_from_transaction
*
* Updates passed extent based on t. If extent has been retired,
* a null result will be returned.
*/
CachedExtentRef update_extent_from_transaction(
Transaction &t,
CachedExtentRef extent) {
if (extent->get_type() == extent_types_t::ROOT) {
if (t.root) {
return t.root;
} else {
return extent;
}
} else {
auto result = t.get_extent(extent->get_paddr(), &extent);
if (result == Transaction::get_extent_ret::RETIRED) {
return CachedExtentRef();
} else {
return extent;
}
}
}
/**
* print
*

View File

@ -135,6 +135,20 @@ public:
Transaction &t,
CachedExtentRef e) = 0;
/**
* rewrite_extent
*
* rewrite extent into passed transaction
*/
using rewrite_extent_ertr = crimson::errorator<
crimson::ct_error::input_output_error>;
using rewrite_extent_ret = rewrite_extent_ertr::future<>;
virtual rewrite_extent_ret rewrite_extent(
Transaction &t,
CachedExtentRef extent) = 0;
virtual void add_pin(LBAPin &pin) = 0;
virtual ~LBAManager() {}

View File

@ -293,6 +293,82 @@ BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
});
}
BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent(
Transaction &t,
CachedExtentRef extent)
{
if (extent->is_logical()) {
auto lextent = extent->cast<LogicalCachedExtent>();
cache.retire_extent(t, extent);
auto nlextent = cache.alloc_new_extent_by_type(
t,
lextent->get_type(),
lextent->get_length())->cast<LogicalCachedExtent>();
lextent->get_bptr().copy_out(
0,
lextent->get_length(),
nlextent->get_bptr().c_str());
nlextent->set_laddr(lextent->get_laddr());
nlextent->set_pin(lextent->get_pin().duplicate());
logger().debug(
"{}: rewriting {} into {}",
__func__,
*lextent,
*nlextent);
return update_mapping(
t,
lextent->get_laddr(),
[prev_addr = lextent->get_paddr(), addr = nlextent->get_paddr()](
const lba_map_val_t &in) {
lba_map_val_t ret = in;
ceph_assert(in.paddr == prev_addr);
ret.paddr = addr;
return ret;
}).safe_then([nlextent](auto e) {}).handle_error(
rewrite_extent_ertr::pass_further{},
/* ENOENT in particular should be impossible */
crimson::ct_error::assert_all{}
);
} else if (is_lba_node(*extent)) {
auto lba_extent = extent->cast<LBANode>();
cache.retire_extent(t, extent);
auto nlba_extent = cache.alloc_new_extent_by_type(
t,
lba_extent->get_type(),
lba_extent->get_length())->cast<LBANode>();
lba_extent->get_bptr().copy_out(
0,
lba_extent->get_length(),
nlba_extent->get_bptr().c_str());
nlba_extent->pin.set_range(nlba_extent->get_node_meta());
/* This is a bit underhanded. Any relative addrs here must necessarily
* be record relative as we are rewriting a dirty extent. Thus, we
* are using resolve_relative_addrs with a (likely negative) block
* relative offset to correct them to block-relative offsets adjusted
* for our new transaction location.
*
* Upon commit, these now block relative addresses will be interpretted
* against the real final address.
*/
nlba_extent->resolve_relative_addrs(
make_record_relative_paddr(0) - nlba_extent->get_paddr());
return update_internal_mapping(
t,
nlba_extent->get_node_meta().depth,
nlba_extent->get_node_meta().begin,
nlba_extent->get_paddr()).safe_then(
[](auto) {},
rewrite_extent_ertr::pass_further {},
crimson::ct_error::assert_all{});
} else {
return rewrite_extent_ertr::now();
}
}
BtreeLBAManager::BtreeLBAManager(
SegmentManager &segment_manager,
Cache &cache)
@ -373,4 +449,48 @@ BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping(
});
}
BtreeLBAManager::update_internal_mapping_ret
BtreeLBAManager::update_internal_mapping(
Transaction &t,
depth_t depth,
laddr_t laddr,
paddr_t paddr)
{
return cache.get_root(t).safe_then([=, &t](RootBlockRef croot) {
if (depth == croot->get_lba_root().lba_depth) {
logger().debug(
"update_internal_mapping: updating lba root to: {}->{}",
laddr,
paddr);
{
auto mut_croot = cache.duplicate_for_write(t, croot);
croot = mut_croot->cast<RootBlock>();
}
ceph_assert(laddr == 0);
auto old_paddr = croot->get_lba_root().lba_root_addr;
croot->get_lba_root().lba_root_addr = paddr;
return update_internal_mapping_ret(
update_internal_mapping_ertr::ready_future_marker{},
old_paddr);
} else {
logger().debug(
"update_internal_mapping: updating lba node at depth {} to: {}->{}",
depth,
laddr,
paddr);
return get_lba_btree_extent(
get_context(t),
croot->get_lba_root().lba_depth,
croot->get_lba_root().lba_root_addr,
paddr_t()).safe_then([=, &t](LBANodeRef broot) {
return broot->mutate_internal_address(
get_context(t),
depth,
laddr,
paddr);
});
}
});
}
}

View File

@ -87,6 +87,10 @@ public:
Transaction &t,
CachedExtentRef e) final;
rewrite_extent_ret rewrite_extent(
Transaction &t,
CachedExtentRef extent);
void add_pin(LBAPin &pin) final {
pin_set.add_pin(reinterpret_cast<BtreeLBAPin*>(&pin)->pin);
}
@ -151,6 +155,14 @@ private:
Transaction &t,
laddr_t addr,
update_func_t &&f);
using update_internal_mapping_ertr = LBANode::mutate_internal_address_ertr;
using update_internal_mapping_ret = LBANode::mutate_internal_address_ret;
update_internal_mapping_ret update_internal_mapping(
Transaction &t,
depth_t depth,
laddr_t laddr,
paddr_t paddr);
};
using BtreeLBAManagerRef = std::unique_ptr<BtreeLBAManager>;

View File

@ -138,6 +138,25 @@ struct LBANode : CachedExtent {
laddr_t laddr,
mutate_func_t &&f) = 0;
/**
* mutate_internal_address
*
* Looks up internal node mapping at laddr, depth and
* updates the mapping to paddr. Returns previous paddr
* (for debugging purposes).
*/
using mutate_internal_address_ertr = crimson::errorator<
crimson::ct_error::enoent, ///< mapping does not exist
crimson::ct_error::input_output_error
>;
using mutate_internal_address_ret = mutate_internal_address_ertr::future<
paddr_t>;
virtual mutate_internal_address_ret mutate_internal_address(
op_context_t c,
depth_t depth,
laddr_t laddr,
paddr_t paddr) = 0;
/**
* make_split_children
*
@ -201,7 +220,6 @@ struct LBANode : CachedExtent {
resolve_relative_addrs(get_paddr());
}
protected:
virtual void resolve_relative_addrs(paddr_t base) = 0;
};
using LBANodeRef = LBANode::LBANodeRef;

View File

@ -39,12 +39,11 @@ LBAInternalNode::lookup_ret LBAInternalNode::lookup(
}
assert(meta.begin <= addr);
assert(meta.end > addr);
auto [begin, end] = bound(addr, 0);
assert(begin == end + 1);
auto iter = lower_bound(addr);
return get_lba_btree_extent(
c,
meta.depth,
begin->get_val(),
meta.depth - 1,
iter->get_val(),
get_paddr()).safe_then([c, addr, depth](auto child) {
return child->lookup(c, addr, depth);
});
@ -131,6 +130,54 @@ LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping(
});
}
LBAInternalNode::mutate_internal_address_ret LBAInternalNode::mutate_internal_address(
op_context_t c,
depth_t depth,
laddr_t laddr,
paddr_t paddr)
{
if (get_meta().depth == (depth + 1)) {
if (!is_pending()) {
return c.cache.duplicate_for_write(c.trans, this)->cast<LBAInternalNode>(
)->mutate_internal_address(
c,
depth,
laddr,
paddr);
}
auto iter = get_containing_child(laddr);
if (iter->get_key() != laddr) {
return crimson::ct_error::enoent::make();
}
auto old_paddr = iter->get_val();
journal_update(
iter,
maybe_generate_relative(paddr),
maybe_get_delta_buffer());
return mutate_internal_address_ret(
mutate_internal_address_ertr::ready_future_marker{},
old_paddr
);
} else {
auto iter = get_containing_child(laddr);
return get_lba_btree_extent(
c,
get_meta().depth - 1,
iter->get_val(),
get_paddr()
).safe_then([=](auto node) {
return node->mutate_internal_address(
c,
depth,
laddr,
paddr);
});
}
}
LBAInternalNode::find_hole_ret LBAInternalNode::find_hole(
op_context_t c,
laddr_t min,
@ -434,6 +481,18 @@ LBALeafNode::mutate_mapping_ret LBALeafNode::mutate_mapping(
}
}
LBALeafNode::mutate_internal_address_ret LBALeafNode::mutate_internal_address(
op_context_t c,
depth_t depth,
laddr_t laddr,
paddr_t paddr)
{
ceph_assert(0 == "Impossible");
return mutate_internal_address_ret(
mutate_internal_address_ertr::ready_future_marker{},
paddr);
}
LBALeafNode::find_hole_ret LBALeafNode::find_hole(
op_context_t c,
laddr_t min,

View File

@ -109,6 +109,12 @@ struct LBAInternalNode
laddr_t laddr,
mutate_func_t &&f) final;
mutate_internal_address_ret mutate_internal_address(
op_context_t c,
depth_t depth,
laddr_t laddr,
paddr_t paddr) final;
find_hole_ret find_hole(
op_context_t c,
laddr_t min,
@ -348,6 +354,12 @@ struct LBALeafNode
laddr_t laddr,
mutate_func_t &&f) final;
mutate_internal_address_ret mutate_internal_address(
op_context_t c,
depth_t depth,
laddr_t laddr,
paddr_t paddr) final;
find_hole_ret find_hole(
op_context_t c,
laddr_t min,