crimson/os/seastore/lba_manager: link lba leaf nodes with logical extents by pointers

Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
This commit is contained in:
Xuehan Xu 2022-10-25 14:03:43 +08:00
parent 55e1924e38
commit cce850d756
22 changed files with 497 additions and 156 deletions

View File

@ -18,6 +18,8 @@ set(crimson_seastore_srcs
omap_manager.cc
omap_manager/btree/btree_omap_manager.cc
omap_manager/btree/omap_btree_node_impl.cc
btree/btree_range_pin.cc
btree/fixed_kv_node.cc
onode.cc
onode_manager/staged-fltree/node.cc
onode_manager/staged-fltree/node_extent_manager.cc

View File

@ -92,7 +92,8 @@ public:
const_iterator insert(
const_iterator iter,
paddr_t key,
backref_map_val_t val) final {
backref_map_val_t val,
LogicalCachedExtent*) final {
journal_insert(
iter,
key,
@ -103,7 +104,8 @@ public:
void update(
const_iterator iter,
backref_map_val_t val) final {
backref_map_val_t val,
LogicalCachedExtent*) final {
return journal_update(
iter,
val,

View File

@ -242,7 +242,8 @@ BtreeBackrefManager::new_mapping(
c,
*state.insert_iter,
state.last_end,
val
val,
nullptr
).si_then([&state, c, addr, len, key](auto &&p) {
LOG_PREFIX(BtreeBackrefManager::new_mapping);
auto [iter, inserted] = std::move(p);

View File

@ -17,10 +17,12 @@ public:
BtreeBackrefPin() = default;
BtreeBackrefPin(
CachedExtentRef parent,
uint16_t pos,
backref_map_val_t &val,
backref_node_meta_t &&meta)
: BtreeNodePin(
parent,
pos,
val.laddr,
val.len,
std::forward<backref_node_meta_t>(meta)),

View File

@ -0,0 +1,36 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include "crimson/os/seastore/btree/btree_range_pin.h"
#include "crimson/os/seastore/btree/fixed_kv_node.h"
namespace crimson::os::seastore {
template <typename key_t, typename val_t>
void BtreeNodePin<key_t, val_t>::link_extent(LogicalCachedExtent *ref) {
assert(ref->is_valid());
// it's only when reading logical extents from disk that we need to
// link them to lba leaves
if (!ref->is_pending() && !ref->is_exist_clean()) {
assert(parent);
assert(pos != std::numeric_limits<uint16_t>::max());
if (parent->is_initial_pending()) {
auto &p = ((FixedKVNode<key_t>&)*parent).get_stable_for_key(
pin.range.begin);
p.link_child(ref, pos);
} else if (parent->is_mutation_pending()) {
auto &p = (FixedKVNode<key_t>&)*parent->get_prior_instance();
p.link_child(ref, pos);
} else {
assert(!parent->is_pending() && parent->is_valid());
auto &p = (FixedKVNode<key_t>&)*parent;
p.link_child(ref, pos);
}
pos = std::numeric_limits<uint16_t>::max();
}
pin.set_extent(ref);
}
template void BtreeNodePin<laddr_t, paddr_t>::link_extent(LogicalCachedExtent*);
template void BtreeNodePin<paddr_t, laddr_t>::link_extent(LogicalCachedExtent*);
} // namespace crimson::os::seastore

View File

@ -453,6 +453,7 @@ class BtreeNodePin : public PhysicalNodePin<key_t, val_t> {
val_t value;
extent_len_t len;
btree_range_pin_t<key_t> pin;
uint16_t pos = std::numeric_limits<uint16_t>::max();
public:
using val_type = val_t;
@ -460,13 +461,18 @@ public:
BtreeNodePin(
CachedExtentRef parent,
uint16_t pos,
val_t &value,
extent_len_t len,
fixed_kv_node_meta_t<key_t> &&meta)
: parent(parent), value(value), len(len) {
: parent(parent), value(value), len(len), pos(pos) {
pin.set_range(std::move(meta));
}
CachedExtentRef get_parent() const final {
return parent;
}
btree_range_pin_t<key_t>& get_range_pin() {
return pin;
}
@ -479,9 +485,7 @@ public:
parent = pin;
}
void link_extent(LogicalCachedExtent *ref) final {
pin.set_extent(ref);
}
void link_extent(LogicalCachedExtent *ref) final;
extent_len_t get_length() const final {
ceph_assert(pin.range.end > pin.range.begin);

View File

@ -15,12 +15,16 @@
#include "crimson/os/seastore/btree/btree_range_pin.h"
#include "crimson/os/seastore/root_block.h"
#define RESERVATION_PTR reinterpret_cast<ChildableCachedExtent*>(0x1)
namespace crimson::os::seastore::lba_manager::btree {
struct lba_map_val_t;
}
namespace crimson::os::seastore {
bool is_valid_child_ptr(ChildableCachedExtent* child);
template <typename T>
phy_tree_root_t& get_phy_tree_root(root_t& r);
@ -223,6 +227,7 @@ public:
auto key = get_key();
return std::make_unique<pin_t>(
leaf.node,
leaf.pos,
val,
fixed_kv_node_meta_t<node_key_t>{ key, key + val.len, 0 });
}
@ -545,7 +550,8 @@ public:
op_context_t<node_key_t> c,
iterator iter,
node_key_t laddr,
node_val_t val
node_val_t val,
LogicalCachedExtent* nextent
) {
LOG_PREFIX(FixedKVBtree::insert);
SUBTRACET(
@ -556,10 +562,10 @@ public:
iter.is_end() ? min_max_t<node_key_t>::max : iter.get_key());
return seastar::do_with(
iter,
[this, c, laddr, val](auto &ret) {
[this, c, laddr, val, nextent](auto &ret) {
return find_insertion(
c, laddr, ret
).si_then([this, c, laddr, val, &ret] {
).si_then([this, c, laddr, val, &ret, nextent] {
if (!ret.at_boundary() && ret.get_key() == laddr) {
return insert_ret(
interruptible::ready_future_marker{},
@ -568,7 +574,7 @@ public:
++(get_tree_stats<self_type>(c.trans).num_inserts);
return handle_split(
c, ret
).si_then([c, laddr, val, &ret] {
).si_then([c, laddr, val, &ret, nextent] {
if (!ret.leaf.node->is_mutable()) {
CachedExtentRef mut = c.cache.duplicate_for_write(
c.trans, ret.leaf.node
@ -581,7 +587,7 @@ public:
assert(iter == ret.leaf.node->end() || iter->get_key() > laddr);
assert(laddr >= ret.leaf.node->get_meta().begin &&
laddr < ret.leaf.node->get_meta().end);
ret.leaf.node->insert(iter, laddr, val);
ret.leaf.node->insert(iter, laddr, val, nextent);
return insert_ret(
interruptible::ready_future_marker{},
std::make_pair(ret, true));
@ -594,11 +600,12 @@ public:
insert_ret insert(
op_context_t<node_key_t> c,
node_key_t laddr,
node_val_t val) {
node_val_t val,
LogicalCachedExtent* nextent) {
return lower_bound(
c, laddr
).si_then([this, c, laddr, val](auto iter) {
return this->insert(c, iter, laddr, val);
).si_then([this, c, laddr, val, nextent](auto iter) {
return this->insert(c, iter, laddr, val, nextent);
});
}
@ -617,7 +624,8 @@ public:
update_ret update(
op_context_t<node_key_t> c,
iterator iter,
node_val_t val)
node_val_t val,
LogicalCachedExtent* nextent)
{
LOG_PREFIX(FixedKVBtree::update);
SUBTRACET(
@ -634,7 +642,8 @@ public:
++(get_tree_stats<self_type>(c.trans).num_updates);
iter.leaf.node->update(
iter.leaf.node->iter_idx(iter.leaf.pos),
val);
val,
nextent);
return update_ret(
interruptible::ready_future_marker{},
iter);

View File

@ -0,0 +1,12 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#include "crimson/os/seastore/btree/fixed_kv_node.h"
namespace crimson::os::seastore {
bool is_valid_child_ptr(ChildableCachedExtent* child) {
return child != nullptr && child != RESERVATION_PTR;
}
} // namespace crimson::os::seastore

View File

@ -28,25 +28,8 @@ namespace crimson::os::seastore {
* Base class enabling recursive lookup between internal and leaf nodes.
*/
template <typename node_key_t>
struct FixedKVNode : CachedExtent {
struct FixedKVNode : ChildableCachedExtent {
using FixedKVNodeRef = TCachedExtentRef<FixedKVNode>;
struct parent_tracker_t
: public boost::intrusive_ref_counter<
parent_tracker_t, boost::thread_unsafe_counter> {
parent_tracker_t(FixedKVNodeRef parent)
: parent(parent) {}
parent_tracker_t(FixedKVNode* parent)
: parent(parent) {}
FixedKVNodeRef parent = nullptr;
~parent_tracker_t() {
// this is parent's tracker, reset it
if (parent->my_tracker == this) {
parent->my_tracker = nullptr;
}
}
};
using parent_tracker_ref = boost::intrusive_ptr<parent_tracker_t>;
btree_range_pin_t<node_key_t> pin;
struct copy_source_cmp_t {
@ -98,20 +81,24 @@ struct FixedKVNode : CachedExtent {
* its "prior_instance" if the node is the result of a rewrite), with which
* the lba range of this node overlaps.
*/
std::vector<CachedExtent*> children;
std::vector<ChildableCachedExtent*> children;
std::set<FixedKVNodeRef, copy_source_cmp_t> copy_sources;
uint16_t capacity = 0;
parent_tracker_t* my_tracker = nullptr;
parent_tracker_ref parent_tracker;
RootBlockRef root_block;
bool is_linked() {
assert(!has_parent_tracker() || !(bool)root_block);
return (bool)has_parent_tracker() || (bool)root_block;
}
FixedKVNode(uint16_t capacity, ceph::bufferptr &&ptr)
: CachedExtent(std::move(ptr)),
: ChildableCachedExtent(std::move(ptr)),
pin(this),
children(capacity, nullptr),
capacity(capacity) {}
FixedKVNode(const FixedKVNode &rhs)
: CachedExtent(rhs),
: ChildableCachedExtent(rhs),
pin(rhs.pin, this),
children(rhs.capacity, nullptr),
capacity(rhs.capacity) {}
@ -128,6 +115,8 @@ struct FixedKVNode : CachedExtent {
set_child_ptracker(child);
}
virtual bool is_leaf_and_has_children() const = 0;
template<typename iter_t>
void insert_child_ptr(iter_t iter, ChildableCachedExtent* child) {
auto raw_children = children.data();
@ -136,8 +125,18 @@ struct FixedKVNode : CachedExtent {
&raw_children[offset + 1],
&raw_children[offset],
(get_node_size() - offset) * sizeof(ChildableCachedExtent*));
children[offset] = child;
set_child_ptracker(child);
if (child) {
children[offset] = child;
set_child_ptracker(child);
} else {
// this can only happen when reserving lba spaces
ceph_assert(is_leaf_and_has_children());
// this is to avoid mistakenly copying pointers from
// copy sources when committing this lba node, because
// we rely on pointers' "nullness" to avoid copying
// pointers for updated values
children[offset] = RESERVATION_PTR;
}
}
template<typename iter_t>
@ -227,7 +226,7 @@ struct FixedKVNode : CachedExtent {
: stable_parent(stable_parent), pos(pos) {}
};
void link_child(FixedKVNode* child, uint16_t pos) {
void link_child(ChildableCachedExtent* child, uint16_t pos) {
assert(pos < get_node_size());
assert(child);
ceph_assert(!is_pending());
@ -242,14 +241,14 @@ struct FixedKVNode : CachedExtent {
auto pos = iter.get_offset();
assert(children.capacity());
auto child = children[pos];
if (child) {
if (is_valid_child_ptr(child)) {
return child_pos_t(child->get_transactional_view(t));
} else if (is_pending()) {
auto key = iter.get_key();
auto &sparent = get_stable_for_key(key);
auto spos = sparent.child_pos_for_key(key);
auto child = sparent.children[spos];
if (child) {
if (is_valid_child_ptr(child)) {
return child_pos_t(child->get_transactional_view(t));
} else {
return child_pos_t(&sparent, spos);
@ -357,10 +356,9 @@ struct FixedKVNode : CachedExtent {
return;
}
ceph_assert(!root_block);
parent_tracker = prior.parent_tracker;
auto &parent = parent_tracker->parent;
assert(parent);
assert(parent->is_valid());
take_prior_parent_tracker();
assert(is_parent_valid());
auto parent = get_parent_node<FixedKVNode>();
//TODO: can this search be avoided?
auto off = parent->lower_bound_offset(get_node_meta().begin);
assert(parent->get_key_from_idx(off) == get_node_meta().begin);
@ -385,7 +383,7 @@ struct FixedKVNode : CachedExtent {
assert(prior.my_tracker || prior.is_children_empty());
if (prior.my_tracker) {
prior.my_tracker->parent.reset(this);
prior.my_tracker->reset_parent(this);
my_tracker = prior.my_tracker;
// All my initial pending children is pointing to the original
// tracker which has been dropped by the above line, so need
@ -401,8 +399,8 @@ struct FixedKVNode : CachedExtent {
ceph_assert(end <= children.end());
for (auto it = begin; it != end; it++) {
auto child = *it;
if (child) {
set_child_ptracker((FixedKVNode*)child);
if (is_valid_child_ptr(child)) {
set_child_ptracker(child);
}
}
}
@ -485,7 +483,7 @@ struct FixedKVNode : CachedExtent {
}
void on_invalidated(Transaction &t) final {
parent_tracker.reset();
reset_parent_tracker();
}
bool is_rewrite() {
@ -495,17 +493,17 @@ struct FixedKVNode : CachedExtent {
void on_initial_write() final {
// All in-memory relative addrs are necessarily block-relative
resolve_relative_addrs(get_paddr());
ceph_assert(
parent_tracker
? (parent_tracker->parent && parent_tracker->parent->is_valid())
: true);
if (pin.is_root()) {
reset_parent_tracker();
}
assert(has_parent_tracker() ? (is_parent_valid()) : true);
}
void set_child_ptracker(FixedKVNode *child) {
if (!my_tracker) {
my_tracker = new parent_tracker_t(this);
void set_child_ptracker(ChildableCachedExtent *child) {
if (!this->my_tracker) {
this->my_tracker = new parent_tracker_t(this);
}
child->parent_tracker.reset(my_tracker);
child->reset_parent_tracker(this->my_tracker);
}
void on_clean_read() final {
@ -564,6 +562,10 @@ struct FixedKVInternalNode
: FixedKVNode<NODE_KEY>(rhs),
node_layout_t(this->get_bptr().c_str()) {}
bool is_leaf_and_has_children() const final {
return false;
}
uint16_t get_node_split_pivot() final {
return this->get_split_pivot().get_offset();
}
@ -617,9 +619,8 @@ struct FixedKVInternalNode
ceph_assert(this->root_block);
unlink_phy_tree_root_node<NODE_KEY>(this->root_block);
} else {
ceph_assert(this->parent_tracker);
auto &parent = this->parent_tracker->parent;
ceph_assert(parent);
ceph_assert(this->is_parent_valid());
auto parent = this->template get_parent_node<FixedKVNode<NODE_KEY>>();
auto off = parent->lower_bound_offset(this->get_meta().begin);
assert(parent->get_key_from_idx(off) == this->get_meta().begin);
assert(parent->children[off] == this);
@ -853,17 +854,13 @@ struct FixedKVInternalNode
}
}
std::ostream &print_detail(std::ostream &out) const
std::ostream &_print_detail(std::ostream &out) const
{
out << ", size=" << this->get_size()
<< ", meta=" << this->get_meta()
<< ", parent_tracker=" << (void*)this->parent_tracker.get();
if (this->parent_tracker) {
out << ", parent=" << (void*)this->parent_tracker->parent.get();
}
out << ", my_tracker=" << (void*)this->my_tracker;
<< ", my_tracker=" << (void*)this->my_tracker;
if (this->my_tracker) {
out << ", my_tracker->parent=" << (void*)this->my_tracker->parent.get();
out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get();
}
return out << ", root_block=" << (void*)this->root_block.get();
}
@ -936,8 +933,18 @@ struct FixedKVLeafNode
VAL,
VAL_LE>;
using internal_const_iterator_t = typename node_layout_t::const_iterator;
using this_type_t = FixedKVLeafNode<
CAPACITY,
NODE_KEY,
NODE_KEY_LE,
VAL,
VAL_LE,
node_size,
node_type_t,
has_children>;
using base_t = FixedKVNode<NODE_KEY>;
FixedKVLeafNode(ceph::bufferptr &&ptr)
: FixedKVNode<NODE_KEY>(0, std::move(ptr)),
: FixedKVNode<NODE_KEY>(has_children ? CAPACITY : 0, std::move(ptr)),
node_layout_t(this->get_bptr().c_str()) {}
FixedKVLeafNode(const FixedKVLeafNode &rhs)
: FixedKVNode<NODE_KEY>(rhs),
@ -945,11 +952,15 @@ struct FixedKVLeafNode
static constexpr bool do_has_children = has_children;
bool is_leaf_and_has_children() const final {
return has_children;
}
uint16_t get_node_split_pivot() final {
return this->get_split_pivot().get_offset();
}
bool validate_stable_children() final {
bool validate_stable_children() override {
return true;
}
@ -959,9 +970,8 @@ struct FixedKVLeafNode
ceph_assert(this->root_block);
unlink_phy_tree_root_node<NODE_KEY>(this->root_block);
} else {
ceph_assert(this->parent_tracker);
auto &parent = this->parent_tracker->parent;
ceph_assert(parent);
ceph_assert(this->is_parent_valid());
auto parent = this->template get_parent_node<FixedKVNode<NODE_KEY>>();
auto off = parent->lower_bound_offset(this->get_meta().begin);
assert(parent->get_key_from_idx(off) == this->get_meta().begin);
assert(parent->children[off] == this);
@ -970,9 +980,49 @@ struct FixedKVLeafNode
}
}
void on_replace_prior(Transaction &t) final {
this->set_parent_tracker();
assert(this->mutate_state.empty());
void prepare_write() final {
if constexpr (has_children) {
if (this->is_initial_pending()) {
if (this->is_rewrite()) {
this->set_children_from_prior_instance();
}
this->copy_children_from_stable_sources(
[this](base_t &node, uint16_t pos) {
ceph_assert(node.get_type() == this->get_type());
auto &n = static_cast<this_type_t&>(node);
return n.iter_idx(pos);
}
);
if (this->is_rewrite()) {
this->reset_prior_instance();
} else {
this->adjust_ptracker_for_children();
}
assert(this->validate_stable_children());
this->copy_sources.clear();
}
}
assert(this->is_initial_pending()
? this->copy_sources.empty():
true);
}
void on_replace_prior(Transaction&) final {
ceph_assert(!this->is_rewrite());
if constexpr (has_children) {
this->set_children_from_prior_instance();
auto &prior = (this_type_t&)(*this->get_prior_instance());
auto copied = this->copy_children_from_stable_source(
prior,
prior.begin(),
prior.end(),
this->begin());
ceph_assert(copied <= get_node_size());
assert(this->validate_stable_children());
this->set_parent_tracker_from_prior_instance();
} else {
this->set_parent_tracker_from_prior_instance();
}
}
uint16_t lower_bound_offset(NODE_KEY key) const final {
@ -1011,11 +1061,13 @@ struct FixedKVLeafNode
virtual void update(
internal_const_iterator_t iter,
VAL val) = 0;
VAL val,
LogicalCachedExtent* nextent) = 0;
virtual internal_const_iterator_t insert(
internal_const_iterator_t iter,
NODE_KEY addr,
VAL val) = 0;
VAL val,
LogicalCachedExtent* nextent) = 0;
virtual void remove(internal_const_iterator_t iter) = 0;
std::tuple<Ref, Ref, NODE_KEY>
@ -1024,6 +1076,9 @@ struct FixedKVLeafNode
c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
auto right = c.cache.template alloc_new_extent<node_type_t>(
c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
if constexpr (has_children) {
this->split_child_ptrs(*left, *right);
}
auto pivot = this->split_into(*left, *right);
left->pin.set_range(left->get_meta());
right->pin.set_range(right->get_meta());
@ -1038,6 +1093,9 @@ struct FixedKVLeafNode
Ref &right) {
auto replacement = c.cache.template alloc_new_extent<node_type_t>(
c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
if constexpr (has_children) {
replacement->merge_child_ptrs(*this, *right);
}
replacement->merge_from(*this, *right->template cast<node_type_t>());
replacement->pin.set_range(replacement->get_meta());
return replacement;
@ -1061,6 +1119,14 @@ struct FixedKVLeafNode
prefer_left,
*replacement_left,
*replacement_right);
if constexpr (has_children) {
this->balance_child_ptrs(
*this,
right,
prefer_left,
*replacement_left,
*replacement_right);
}
replacement_left->pin.set_range(replacement_left->get_meta());
replacement_right->pin.set_range(replacement_right->get_meta());
@ -1090,15 +1156,10 @@ struct FixedKVLeafNode
this->resolve_relative_addrs(base);
}
std::ostream &print_detail(std::ostream &out) const
std::ostream &_print_detail(std::ostream &out) const
{
out << ", size=" << this->get_size()
<< ", meta=" << this->get_meta()
<< ", parent_tracker=" << (void*)this->parent_tracker.get();
if (this->parent_tracker) {
out << ", parent=" << (void*)this->parent_tracker->parent.get();
}
return out;
return out << ", size=" << this->get_size()
<< ", meta=" << this->get_meta();
}
constexpr static size_t get_min_capacity() {

View File

@ -6,6 +6,8 @@
#include "crimson/common/log.h"
#include "crimson/os/seastore/btree/fixed_kv_node.h"
namespace {
[[maybe_unused]] seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_seastore_tm);
@ -91,7 +93,22 @@ CachedExtent* CachedExtent::get_transactional_view(transaction_id_t tid) {
}
}
std::ostream &LogicalCachedExtent::print_detail(std::ostream &out) const
std::ostream &operator<<(std::ostream &out, const parent_tracker_t &tracker) {
return out << "parent_tracker=" << (void*)&tracker
<< ", parent=" << (void*)tracker.get_parent().get();
}
std::ostream &ChildableCachedExtent::print_detail(std::ostream &out) const {
if (parent_tracker) {
out << *parent_tracker;
} else {
out << ", parent_tracker=" << (void*)nullptr;
}
_print_detail(out);
return out;
}
std::ostream &LogicalCachedExtent::_print_detail(std::ostream &out) const
{
out << ", laddr=" << laddr;
if (pin) {
@ -110,6 +127,36 @@ void CachedExtent::set_invalid(Transaction &t) {
on_invalidated(t);
}
LogicalCachedExtent::~LogicalCachedExtent() {
if (has_parent_tracker() && is_valid() && !is_pending()) {
assert(get_parent_node());
auto parent = get_parent_node<FixedKVNode<laddr_t>>();
auto off = parent->lower_bound_offset(laddr);
assert(parent->get_key_from_idx(off) == laddr);
assert(parent->children[off] == this);
parent->children[off] = nullptr;
}
}
void LogicalCachedExtent::on_replace_prior(Transaction &t) {
assert(is_mutation_pending());
take_prior_parent_tracker();
assert(get_parent_node());
auto parent = get_parent_node<FixedKVNode<laddr_t>>();
//TODO: can this search be avoided?
auto off = parent->lower_bound_offset(laddr);
assert(parent->get_key_from_idx(off) == laddr);
parent->children[off] = this;
}
parent_tracker_t::~parent_tracker_t() {
// this is parent's tracker, reset it
auto &p = (FixedKVNode<laddr_t>&)*parent;
if (p.my_tracker == this) {
p.my_tracker = nullptr;
}
}
std::ostream &operator<<(std::ostream &out, const LBAPin &rhs)
{
return out << "LBAPin(" << rhs.get_key() << "~" << rhs.get_length()

View File

@ -33,6 +33,8 @@ template <
size_t node_size,
bool leaf_has_children>
class FixedKVBtree;
template <typename, typename>
class BtreeNodePin;
// #define DEBUG_CACHED_EXTENT_REF
#ifdef DEBUG_CACHED_EXTENT_REF
@ -543,6 +545,8 @@ public:
void set_invalid(Transaction &t);
// a rewrite extent has an invalid prior_instance,
// and a mutation_pending extent has a valid prior_instance
CachedExtentRef get_prior_instance() {
return prior_instance;
}
@ -715,6 +719,8 @@ protected:
friend class crimson::os::seastore::SegmentedAllocator;
friend class crimson::os::seastore::TransactionManager;
friend class crimson::os::seastore::ExtentPlacementManager;
template <typename, typename>
friend class BtreeNodePin;
};
std::ostream &operator<<(std::ostream &, CachedExtent::extent_state_t);
@ -885,6 +891,7 @@ public:
virtual key_t get_key() const = 0;
virtual PhysicalNodePinRef<key_t, val_t> duplicate() const = 0;
virtual bool has_been_invalidated() const = 0;
virtual CachedExtentRef get_parent() const = 0;
virtual ~PhysicalNodePin() {}
};
@ -957,6 +964,67 @@ public:
}
};
class parent_tracker_t
: public boost::intrusive_ref_counter<
parent_tracker_t, boost::thread_unsafe_counter> {
public:
parent_tracker_t(CachedExtentRef parent)
: parent(parent) {}
parent_tracker_t(CachedExtent* parent)
: parent(parent) {}
~parent_tracker_t();
template <typename T = CachedExtent>
TCachedExtentRef<T> get_parent() const {
ceph_assert(parent);
if constexpr (std::is_same_v<T, CachedExtent>) {
return parent;
} else {
return parent->template cast<T>();
}
}
void reset_parent(CachedExtentRef p) {
parent = p;
}
bool is_valid() const {
return parent && parent->is_valid();
}
private:
CachedExtentRef parent;
};
std::ostream &operator<<(std::ostream &, const parent_tracker_t &);
using parent_tracker_ref = boost::intrusive_ptr<parent_tracker_t>;
class ChildableCachedExtent : public CachedExtent {
public:
template <typename... T>
ChildableCachedExtent(T&&... t) : CachedExtent(std::forward<T>(t)...) {}
bool has_parent_tracker() const {
return (bool)parent_tracker;
}
void reset_parent_tracker(parent_tracker_t *p = nullptr) {
parent_tracker.reset(p);
}
bool is_parent_valid() const {
return parent_tracker && parent_tracker->is_valid();
}
template <typename T = CachedExtent>
TCachedExtentRef<T> get_parent_node() const {
assert(parent_tracker);
return parent_tracker->template get_parent<T>();
}
void take_prior_parent_tracker() {
auto &prior = (ChildableCachedExtent&)(*get_prior_instance());
parent_tracker = prior.parent_tracker;
}
std::ostream &print_detail(std::ostream &out) const final;
private:
parent_tracker_ref parent_tracker;
virtual std::ostream &_print_detail(std::ostream &out) const {
return out;
}
};
/**
* LogicalCachedExtent
*
@ -965,10 +1033,12 @@ public:
* Users of TransactionManager should be using extents derived from
* LogicalCachedExtent.
*/
class LogicalCachedExtent : public CachedExtent {
class LogicalCachedExtent : public ChildableCachedExtent {
public:
template <typename... T>
LogicalCachedExtent(T&&... t) : CachedExtent(std::forward<T>(t)...) {}
LogicalCachedExtent(T&&... t)
: ChildableCachedExtent(std::forward<T>(t)...)
{}
void set_pin(LBAPinRef &&npin) {
assert(!pin);
@ -1005,8 +1075,13 @@ public:
return true;
}
std::ostream &print_detail(std::ostream &out) const final;
std::ostream &_print_detail(std::ostream &out) const final;
void on_replace_prior(Transaction &t) final;
virtual ~LogicalCachedExtent();
protected:
virtual void apply_delta(const ceph::bufferlist &bl) = 0;
virtual std::ostream &print_detail_l(std::ostream &out) const {
return out;
@ -1026,6 +1101,16 @@ protected:
private:
laddr_t laddr = L_ADDR_NULL;
LBAPinRef pin;
template <
typename node_key_t,
typename node_val_t,
typename internal_node_t,
typename leaf_node_t,
typename pin_t,
size_t node_size,
bool leaf_has_children>
friend class FixedKVBtree;
};
using LogicalCachedExtentRef = TCachedExtentRef<LogicalCachedExtent>;

View File

@ -17,17 +17,15 @@ LBAManager::update_mappings(
t,
extent->get_laddr(),
extent->get_prior_paddr_and_reset(),
extent->get_paddr()
extent->get_paddr(),
nullptr // all the extents should have already been
// added to the fixed_kv_btree
);
});
}
template <bool leaf_has_children>
LBAManagerRef lba_manager::create_lba_manager(Cache &cache) {
return LBAManagerRef(new btree::BtreeLBAManager<leaf_has_children>(cache));
return LBAManagerRef(new btree::BtreeLBAManager(cache));
}
template LBAManagerRef lba_manager::create_lba_manager<true>(Cache &cache);
template LBAManagerRef lba_manager::create_lba_manager<false>(Cache &cache);
}

View File

@ -80,7 +80,8 @@ public:
Transaction &t,
laddr_t hint,
extent_len_t len,
paddr_t addr) = 0;
paddr_t addr,
LogicalCachedExtent *nextent) = 0;
struct ref_update_result_t {
unsigned refcount = 0;
@ -166,7 +167,8 @@ public:
Transaction& t,
laddr_t laddr,
paddr_t prev_addr,
paddr_t paddr) = 0;
paddr_t paddr,
LogicalCachedExtent *nextent) = 0;
/**
* update_mappings
@ -206,7 +208,6 @@ using LBAManagerRef = std::unique_ptr<LBAManager>;
class Cache;
namespace lba_manager {
template <bool leaf_has_children>
LBAManagerRef create_lba_manager(Cache &cache);
}

View File

@ -209,7 +209,8 @@ BtreeLBAManager::alloc_extent(
Transaction &t,
laddr_t hint,
extent_len_t len,
paddr_t addr)
paddr_t addr,
LogicalCachedExtent* nextent)
{
struct state_t {
laddr_t last_end;
@ -229,7 +230,8 @@ BtreeLBAManager::alloc_extent(
cache,
c,
hint,
[this, FNAME, c, hint, len, addr, lookup_attempts, &t](auto &btree, auto &state) {
[this, FNAME, c, hint, len, addr, lookup_attempts,
&t, nextent](auto &btree, auto &state) {
return LBABtree::iterate_repeat(
c,
btree.upper_bound_right(c, hint),
@ -265,12 +267,13 @@ BtreeLBAManager::alloc_extent(
interruptible::ready_future_marker{},
seastar::stop_iteration::no);
}
}).si_then([FNAME, c, addr, len, hint, &btree, &state] {
}).si_then([FNAME, c, addr, len, hint, &btree, &state, nextent] {
return btree.insert(
c,
*state.insert_iter,
state.last_end,
lba_map_val_t{len, addr, 1, 0}
lba_map_val_t{len, addr, 1, 0},
nextent
).si_then([&state, FNAME, c, addr, len, hint](auto &&p) {
auto [iter, inserted] = std::move(p);
TRACET("{}~{}, hint={}, inserted at {}",
@ -473,7 +476,8 @@ BtreeLBAManager::update_mapping(
Transaction& t,
laddr_t laddr,
paddr_t prev_addr,
paddr_t addr)
paddr_t addr,
LogicalCachedExtent *nextent)
{
LOG_PREFIX(BtreeLBAManager::update_mapping);
TRACET("laddr={}, paddr {} => {}", t, laddr, prev_addr, addr);
@ -487,7 +491,8 @@ BtreeLBAManager::update_mapping(
ceph_assert(in.paddr == prev_addr);
ret.paddr = addr;
return ret;
}
},
nextent
).si_then([&t, laddr, prev_addr, addr, FNAME](auto result) {
DEBUGT("laddr={}, paddr {} => {} done -- {}",
t, laddr, prev_addr, addr, result);
@ -566,7 +571,8 @@ BtreeLBAManager::update_refcount(
ceph_assert((int)out.refcount + delta >= 0);
out.refcount += delta;
return out;
}
},
nullptr
).si_then([&t, addr, delta, FNAME](auto result) {
DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, result);
return ref_update_result_t{
@ -581,16 +587,17 @@ BtreeLBAManager::_update_mapping_ret
BtreeLBAManager::_update_mapping(
Transaction &t,
laddr_t addr,
update_func_t &&f)
update_func_t &&f,
LogicalCachedExtent* nextent)
{
auto c = get_context(t);
return with_btree_ret<LBABtree, lba_map_val_t>(
cache,
c,
[f=std::move(f), c, addr](auto &btree) mutable {
[f=std::move(f), c, addr, nextent](auto &btree) mutable {
return btree.lower_bound(
c, addr
).si_then([&btree, f=std::move(f), c, addr](auto iter)
).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter)
-> _update_mapping_ret {
if (iter.is_end() || iter.get_key() != addr) {
LOG_PREFIX(BtreeLBAManager::_update_mapping);
@ -610,7 +617,8 @@ BtreeLBAManager::_update_mapping(
return btree.update(
c,
iter,
ret
ret,
nextent
).si_then([ret](auto) {
return ret;
});

View File

@ -30,10 +30,12 @@ public:
BtreeLBAPin() = default;
BtreeLBAPin(
CachedExtentRef parent,
uint16_t pos,
lba_map_val_t &val,
lba_node_meta_t &&meta)
: BtreeNodePin(
parent,
pos,
val.paddr,
val.len,
std::forward<lba_node_meta_t>(meta))
@ -88,7 +90,8 @@ public:
Transaction &t,
laddr_t hint,
extent_len_t len,
paddr_t addr) final;
paddr_t addr,
LogicalCachedExtent*) final;
ref_ret decref_extent(
Transaction &t,
@ -133,7 +136,8 @@ public:
Transaction& t,
laddr_t laddr,
paddr_t prev_addr,
paddr_t paddr) final;
paddr_t paddr,
LogicalCachedExtent*) final;
get_physical_extent_if_live_ret get_physical_extent_if_live(
Transaction &t,
@ -198,7 +202,8 @@ private:
_update_mapping_ret _update_mapping(
Transaction &t,
laddr_t addr,
update_func_t &&f);
update_func_t &&f,
LogicalCachedExtent*);
};
using BtreeLBAManagerRef = std::unique_ptr<BtreeLBAManager>;

View File

@ -27,15 +27,15 @@ std::ostream& operator<<(std::ostream& out, const lba_map_val_t& v)
<< ")";
}
std::ostream &LBALeafNode::print_detail(std::ostream &out) const
std::ostream &LBALeafNode::_print_detail(std::ostream &out) const
{
out << ", size=" << get_size()
<< ", meta=" << get_meta()
<< ", parent_tracker=" << (void*)parent_tracker.get();
if (parent_tracker) {
return out << ", parent=" << (void*)parent_tracker->parent.get();
out << ", size=" << this->get_size()
<< ", meta=" << this->get_meta()
<< ", my_tracker=" << (void*)this->my_tracker;
if (this->my_tracker) {
out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get();
}
return out << ", root_block=" << (void*)root_block.get();
return out << ", root_block=" << (void*)this->root_block.get();
}
void LBALeafNode::resolve_relative_addrs(paddr_t base)

View File

@ -145,64 +145,125 @@ struct LBALeafNode
LBALeafNode,
true> {
using Ref = TCachedExtentRef<LBALeafNode>;
using internal_iterator_t = const_iterator;
using parent_type_t = FixedKVLeafNode<
LEAF_NODE_CAPACITY,
laddr_t, laddr_le_t,
lba_map_val_t, lba_map_val_le_t,
LBA_BLOCK_SIZE,
LBALeafNode,
true>;
using internal_const_iterator_t =
typename parent_type_t::node_layout_t::const_iterator;
using internal_iterator_t =
typename parent_type_t::node_layout_t::iterator;
template <typename... T>
LBALeafNode(T&&... t) :
FixedKVLeafNode(std::forward<T>(t)...) {}
parent_type_t(std::forward<T>(t)...) {}
static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
void update(
const_iterator iter,
lba_map_val_t val) final {
val.paddr = maybe_generate_relative(val.paddr);
return journal_update(
iter,
val,
maybe_get_delta_buffer());
bool validate_stable_children() final {
LOG_PREFIX(LBALeafNode::validate_stable_children);
if (this->children.empty()) {
return false;
}
for (auto i : *this) {
auto child = (LogicalCachedExtent*)this->children[i.get_offset()];
if (is_valid_child_ptr(child) && child->get_laddr() != i.get_key()) {
SUBERROR(seastore_fixedkv_tree,
"stable child not valid: child {}, key {}",
*child,
i.get_key());
ceph_abort();
return false;
}
}
return true;
}
const_iterator insert(
const_iterator iter,
void update(
internal_const_iterator_t iter,
lba_map_val_t val,
LogicalCachedExtent* nextent) final {
LOG_PREFIX(LBALeafNode::update);
if (nextent) {
SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, {}",
this->pending_for_transaction,
iter.get_offset(),
*nextent);
// child-ptr may already be correct, see LBAManager::update_mappings()
this->update_child_ptr(iter, nextent);
}
val.paddr = this->maybe_generate_relative(val.paddr);
return this->journal_update(
iter,
val,
this->maybe_get_delta_buffer());
}
internal_const_iterator_t insert(
internal_const_iterator_t iter,
laddr_t addr,
lba_map_val_t val) final {
val.paddr = maybe_generate_relative(val.paddr);
journal_insert(
lba_map_val_t val,
LogicalCachedExtent* nextent) final {
LOG_PREFIX(LBALeafNode::insert);
SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}, extent {}",
this->pending_for_transaction,
iter.get_offset(),
addr,
(void*)nextent);
this->insert_child_ptr(iter, nextent);
val.paddr = this->maybe_generate_relative(val.paddr);
this->journal_insert(
iter,
addr,
val,
maybe_get_delta_buffer());
this->maybe_get_delta_buffer());
return iter;
}
void remove(const_iterator iter) final {
return journal_remove(
void remove(internal_const_iterator_t iter) final {
LOG_PREFIX(LBALeafNode::remove);
SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}",
this->pending_for_transaction,
iter.get_offset(),
iter.get_key());
assert(iter != this->end());
this->remove_child_ptr(iter);
return this->journal_remove(
iter,
maybe_get_delta_buffer());
this->maybe_get_delta_buffer());
}
// See LBAInternalNode, same concept
void resolve_relative_addrs(paddr_t base);
void node_resolve_vals(iterator from, iterator to) const final {
if (is_initial_pending()) {
void node_resolve_vals(
internal_iterator_t from,
internal_iterator_t to) const final
{
if (this->is_initial_pending()) {
for (auto i = from; i != to; ++i) {
auto val = i->get_val();
if (val.paddr.is_relative()) {
assert(val.paddr.is_block_relative());
val.paddr = get_paddr().add_relative(val.paddr);
val.paddr = this->get_paddr().add_relative(val.paddr);
i->set_val(val);
}
}
}
}
void node_unresolve_vals(iterator from, iterator to) const final {
if (is_initial_pending()) {
void node_unresolve_vals(
internal_iterator_t from,
internal_iterator_t to) const final
{
if (this->is_initial_pending()) {
for (auto i = from; i != to; ++i) {
auto val = i->get_val();
if (val.paddr.is_relative()) {
auto val = i->get_val();
assert(val.paddr.is_record_relative());
val.paddr = val.paddr.block_relative_to(get_paddr());
val.paddr = val.paddr.block_relative_to(this->get_paddr());
i->set_val(val);
}
}
@ -213,7 +274,7 @@ struct LBALeafNode
return TYPE;
}
std::ostream &print_detail(std::ostream &out) const final;
std::ostream &_print_detail(std::ostream &out) const final;
};
using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;

View File

@ -219,6 +219,8 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
return out << "LADDR_INTERNAL";
case extent_types_t::LADDR_LEAF:
return out << "LADDR_LEAF";
case extent_types_t::DINK_LADDR_LEAF:
return out << "LADDR_LEAF";
case extent_types_t::ONODE_BLOCK_STAGED:
return out << "ONODE_BLOCK_STAGED";
case extent_types_t::OMAP_INNER:

View File

@ -1062,7 +1062,7 @@ enum class extent_types_t : uint8_t {
ROOT = 0,
LADDR_INTERNAL = 1,
LADDR_LEAF = 2,
DINK_LADDR_LEAF = 3,
DINK_LADDR_LEAF = 3, // should only be used for unitttests
OMAP_INNER = 4,
OMAP_LEAF = 5,
ONODE_BLOCK_STAGED = 6,

View File

@ -486,7 +486,8 @@ TransactionManager::rewrite_logical_extent(
t,
lextent->get_laddr(),
lextent->get_paddr(),
nlextent->get_paddr());
nlextent->get_paddr(),
nlextent.get());
}
TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(

View File

@ -142,6 +142,7 @@ public:
assert(!extent.has_pin());
assert(!extent.has_been_invalidated());
assert(!pin->has_been_invalidated());
assert(pin->get_parent());
extent.set_pin(std::move(pin));
lba_manager->add_pin(extent.get_pin());
}
@ -325,7 +326,8 @@ public:
t,
laddr_hint,
len,
ext->get_paddr()
ext->get_paddr(),
ext.get()
).si_then([ext=std::move(ext), laddr_hint, &t, FNAME](auto &&ref) mutable {
ext->set_pin(std::move(ref));
SUBDEBUGT(seastore_tm, "new extent: {}, laddr_hint: {}", t, *ext, laddr_hint);
@ -380,7 +382,8 @@ public:
t,
laddr_hint,
length,
existing_paddr
existing_paddr,
ext.get()
).si_then([ext=std::move(ext), laddr_hint, this](auto &&ref) {
ceph_assert(laddr_hint == ref->get_key());
ext->set_pin(std::move(ref));
@ -409,7 +412,8 @@ public:
t,
hint,
len,
P_ADDR_ZERO);
P_ADDR_ZERO,
nullptr);
}
/* alloc_extents

View File

@ -257,7 +257,7 @@ struct lba_btree_test : btree_test_base {
check.emplace(addr, get_map_val(len));
lba_btree_update([=, this](auto &btree, auto &t) {
return btree.insert(
get_op_context(t), addr, get_map_val(len)
get_op_context(t), addr, get_map_val(len), nullptr
).si_then([](auto){});
});
}
@ -324,7 +324,7 @@ TEST_F(lba_btree_test, basic)
}
struct btree_lba_manager_test : btree_test_base {
BtreeLBAManagerRef<false> lba_manager;
BtreeLBAManagerRef lba_manager;
btree_lba_manager_test() = default;
@ -426,7 +426,7 @@ struct btree_lba_manager_test : btree_test_base {
auto ret = with_trans_intr(
*t.t,
[=, this](auto &t) {
return lba_manager->alloc_extent(t, hint, len, paddr);
return lba_manager->alloc_extent(t, hint, len, paddr, nullptr);
}).unsafe_get0();
logger().debug("alloc'd: {}", *ret);
EXPECT_EQ(len, ret->get_length());