mirror of
https://github.com/ceph/ceph
synced 2025-02-22 10:37:15 +00:00
Merge pull request #44912 from xxhdx1985126/wip-fixed-kv-btree
crimson/os/seastore: extract fixed kv btree implementation out of lba manager Reviewed-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
commit
95e7ce9305
@ -11,8 +11,6 @@ set(crimson_seastore_srcs
|
||||
lba_manager.cc
|
||||
segment_cleaner.cc
|
||||
lba_manager/btree/btree_lba_manager.cc
|
||||
lba_manager/btree/btree_range_pin.cc
|
||||
lba_manager/btree/lba_btree.cc
|
||||
lba_manager/btree/lba_btree_node.cc
|
||||
omap_manager.cc
|
||||
omap_manager/btree/btree_omap_manager.cc
|
||||
|
475
src/crimson/os/seastore/btree/btree_range_pin.h
Normal file
475
src/crimson/os/seastore/btree/btree_range_pin.h
Normal file
@ -0,0 +1,475 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/intrusive/set.hpp>
|
||||
|
||||
#include "crimson/common/log.h"
|
||||
|
||||
#include "crimson/os/seastore/cached_extent.h"
|
||||
#include "crimson/os/seastore/seastore_types.h"
|
||||
|
||||
namespace crimson::os::seastore {
|
||||
|
||||
template <typename bound_t>
|
||||
struct fixed_kv_node_meta_t {
|
||||
bound_t begin = 0;
|
||||
bound_t end = 0;
|
||||
depth_t depth = 0;
|
||||
|
||||
bool is_parent_of(const fixed_kv_node_meta_t &other) const {
|
||||
return (depth == other.depth + 1) &&
|
||||
(begin <= other.begin) &&
|
||||
(end > other.begin);
|
||||
}
|
||||
|
||||
std::pair<fixed_kv_node_meta_t, fixed_kv_node_meta_t> split_into(bound_t pivot) const {
|
||||
return std::make_pair(
|
||||
fixed_kv_node_meta_t{begin, pivot, depth},
|
||||
fixed_kv_node_meta_t{pivot, end, depth});
|
||||
}
|
||||
|
||||
static fixed_kv_node_meta_t merge_from(
|
||||
const fixed_kv_node_meta_t &lhs, const fixed_kv_node_meta_t &rhs) {
|
||||
ceph_assert(lhs.depth == rhs.depth);
|
||||
return fixed_kv_node_meta_t{lhs.begin, rhs.end, lhs.depth};
|
||||
}
|
||||
|
||||
static std::pair<fixed_kv_node_meta_t, fixed_kv_node_meta_t>
|
||||
rebalance(const fixed_kv_node_meta_t &lhs, const fixed_kv_node_meta_t &rhs, bound_t pivot) {
|
||||
ceph_assert(lhs.depth == rhs.depth);
|
||||
return std::make_pair(
|
||||
fixed_kv_node_meta_t{lhs.begin, pivot, lhs.depth},
|
||||
fixed_kv_node_meta_t{pivot, rhs.end, lhs.depth});
|
||||
}
|
||||
|
||||
bool is_root() const {
|
||||
return begin == 0 && end == L_ADDR_MAX;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename bound_t>
|
||||
inline std::ostream &operator<<(
|
||||
std::ostream &lhs,
|
||||
const fixed_kv_node_meta_t<bound_t> &rhs)
|
||||
{
|
||||
return lhs << "btree_node_meta_t("
|
||||
<< "begin=" << rhs.begin
|
||||
<< ", end=" << rhs.end
|
||||
<< ", depth=" << rhs.depth
|
||||
<< ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* fixed_kv_node_meta_le_t
|
||||
*
|
||||
* On disk layout for fixed_kv_node_meta_t
|
||||
*/
|
||||
template <typename bound_le_t>
|
||||
struct fixed_kv_node_meta_le_t {
|
||||
bound_le_t begin = bound_le_t(0);
|
||||
bound_le_t end = bound_le_t(0);
|
||||
depth_le_t depth = init_depth_le(0);
|
||||
|
||||
fixed_kv_node_meta_le_t() = default;
|
||||
fixed_kv_node_meta_le_t(
|
||||
const fixed_kv_node_meta_le_t<bound_le_t> &) = default;
|
||||
explicit fixed_kv_node_meta_le_t(
|
||||
const fixed_kv_node_meta_t<typename bound_le_t::orig_type> &val)
|
||||
: begin(ceph_le64(val.begin)),
|
||||
end(ceph_le64(val.end)),
|
||||
depth(init_depth_le(val.depth)) {}
|
||||
|
||||
operator fixed_kv_node_meta_t<typename bound_le_t::orig_type>() const {
|
||||
return fixed_kv_node_meta_t<typename bound_le_t::orig_type>{
|
||||
begin, end, depth };
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* btree_range_pin_t
|
||||
*
|
||||
* Element tracked by btree_pin_set_t below. Encapsulates the intrusive_set
|
||||
* hook, the fixed_kv_node_meta_t representing the key range covered by a node,
|
||||
* and extent and ref members intended to hold a reference when the extent
|
||||
* should be pinned.
|
||||
*/
|
||||
template <typename T>
|
||||
class btree_pin_set_t;
|
||||
|
||||
template <typename node_bound_t>
|
||||
class btree_range_pin_t : public boost::intrusive::set_base_hook<> {
|
||||
friend class btree_pin_set_t<node_bound_t>;
|
||||
fixed_kv_node_meta_t<node_bound_t> range;
|
||||
|
||||
btree_pin_set_t<node_bound_t> *pins = nullptr;
|
||||
|
||||
// We need to be able to remember extent without holding a reference,
|
||||
// but we can do it more compactly -- TODO
|
||||
CachedExtent *extent = nullptr;
|
||||
CachedExtentRef ref;
|
||||
|
||||
using index_t = boost::intrusive::set<btree_range_pin_t>;
|
||||
|
||||
static auto get_tuple(const fixed_kv_node_meta_t<node_bound_t> &meta) {
|
||||
return std::make_tuple(-meta.depth, meta.begin);
|
||||
}
|
||||
|
||||
void acquire_ref() {
|
||||
ref = CachedExtentRef(extent);
|
||||
}
|
||||
|
||||
void drop_ref() {
|
||||
ref.reset();
|
||||
}
|
||||
|
||||
public:
|
||||
btree_range_pin_t() = default;
|
||||
btree_range_pin_t(CachedExtent *extent)
|
||||
: extent(extent) {}
|
||||
btree_range_pin_t(const btree_range_pin_t &rhs, CachedExtent *extent)
|
||||
: range(rhs.range), extent(extent) {}
|
||||
|
||||
bool has_ref() const {
|
||||
return !!ref;
|
||||
}
|
||||
|
||||
bool is_root() const {
|
||||
return range.is_root();
|
||||
}
|
||||
|
||||
void set_range(const fixed_kv_node_meta_t<node_bound_t> &nrange) {
|
||||
range = nrange;
|
||||
}
|
||||
void set_extent(CachedExtent *nextent) {
|
||||
ceph_assert(!extent);
|
||||
extent = nextent;
|
||||
}
|
||||
|
||||
CachedExtent &get_extent() {
|
||||
assert(extent);
|
||||
return *extent;
|
||||
}
|
||||
|
||||
bool has_ref() {
|
||||
return !!ref;
|
||||
}
|
||||
|
||||
void take_pin(btree_range_pin_t &other)
|
||||
{
|
||||
ceph_assert(other.extent);
|
||||
if (other.pins) {
|
||||
other.pins->replace_pin(*this, other);
|
||||
pins = other.pins;
|
||||
other.pins = nullptr;
|
||||
|
||||
if (other.has_ref()) {
|
||||
other.drop_ref();
|
||||
acquire_ref();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
friend bool operator<(
|
||||
const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
|
||||
return get_tuple(lhs.range) < get_tuple(rhs.range);
|
||||
}
|
||||
friend bool operator>(
|
||||
const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
|
||||
return get_tuple(lhs.range) > get_tuple(rhs.range);
|
||||
}
|
||||
friend bool operator==(
|
||||
const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
|
||||
return get_tuple(lhs.range) == rhs.get_tuple(rhs.range);
|
||||
}
|
||||
|
||||
struct meta_cmp_t {
|
||||
bool operator()(
|
||||
const btree_range_pin_t &lhs, const fixed_kv_node_meta_t<node_bound_t> &rhs) const {
|
||||
return get_tuple(lhs.range) < get_tuple(rhs);
|
||||
}
|
||||
bool operator()(
|
||||
const fixed_kv_node_meta_t<node_bound_t> &lhs, const btree_range_pin_t &rhs) const {
|
||||
return get_tuple(lhs) < get_tuple(rhs.range);
|
||||
}
|
||||
};
|
||||
|
||||
friend std::ostream &operator<<(
|
||||
std::ostream &lhs,
|
||||
const btree_range_pin_t<node_bound_t> &rhs) {
|
||||
return lhs << "btree_range_pin_t("
|
||||
<< "begin=" << rhs.range.begin
|
||||
<< ", end=" << rhs.range.end
|
||||
<< ", depth=" << rhs.range.depth
|
||||
<< ", extent=" << rhs.extent
|
||||
<< ")";
|
||||
}
|
||||
|
||||
template <typename>
|
||||
friend class BtreeNodePin;
|
||||
~btree_range_pin_t()
|
||||
{
|
||||
ceph_assert(!pins == !is_linked());
|
||||
ceph_assert(!ref);
|
||||
if (pins) {
|
||||
crimson::get_logger(ceph_subsys_seastore_lba
|
||||
).debug("{}: removing {}", __func__, *this);
|
||||
pins->remove_pin(*this, true);
|
||||
}
|
||||
extent = nullptr;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* btree_pin_set_t
|
||||
*
|
||||
* Ensures that for every cached node, all parent btree nodes required
|
||||
* to map it are present in cache. Relocating these nodes can
|
||||
* therefore be done without further reads or cache space.
|
||||
*
|
||||
* Contains a btree_range_pin_t for every clean or dirty btree node
|
||||
* or LogicalCachedExtent instance in cache at any point in time.
|
||||
* For any btree node, the contained btree_range_pin_t will hold
|
||||
* a reference to that node pinning it in cache as long as that
|
||||
* node has children in the set. This invariant can be violated
|
||||
* only by calling retire_extent and is repaired by calling
|
||||
* check_parent synchronously after adding any new extents.
|
||||
*/
|
||||
template <typename node_bound_t>
|
||||
class btree_pin_set_t {
|
||||
friend class btree_range_pin_t<node_bound_t>;
|
||||
using pins_t = typename btree_range_pin_t<node_bound_t>::index_t;
|
||||
pins_t pins;
|
||||
|
||||
/// Removes pin from set optionally checking whether parent has other children
|
||||
void remove_pin(btree_range_pin_t<node_bound_t> &pin, bool do_check_parent)
|
||||
{
|
||||
crimson::get_logger(ceph_subsys_seastore_lba).debug("{}: {}", __func__, pin);
|
||||
ceph_assert(pin.is_linked());
|
||||
ceph_assert(pin.pins);
|
||||
ceph_assert(!pin.ref);
|
||||
|
||||
pins.erase(pin);
|
||||
pin.pins = nullptr;
|
||||
|
||||
if (do_check_parent) {
|
||||
check_parent(pin);
|
||||
}
|
||||
}
|
||||
|
||||
void replace_pin(
|
||||
btree_range_pin_t<node_bound_t> &to,
|
||||
btree_range_pin_t<node_bound_t> &from)
|
||||
{
|
||||
pins.replace_node(pins.iterator_to(from), to);
|
||||
}
|
||||
|
||||
/// Returns parent pin if exists
|
||||
btree_range_pin_t<node_bound_t> *maybe_get_parent(
|
||||
const fixed_kv_node_meta_t<node_bound_t> &meta)
|
||||
{
|
||||
auto cmeta = meta;
|
||||
cmeta.depth++;
|
||||
auto iter = pins.upper_bound(
|
||||
cmeta,
|
||||
typename btree_range_pin_t<node_bound_t>::meta_cmp_t());
|
||||
if (iter == pins.begin()) {
|
||||
return nullptr;
|
||||
} else {
|
||||
--iter;
|
||||
if (iter->range.is_parent_of(meta)) {
|
||||
return &*iter;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns earliest child pin if exist
|
||||
const btree_range_pin_t<node_bound_t>
|
||||
*maybe_get_first_child(const fixed_kv_node_meta_t<node_bound_t> &meta) const
|
||||
{
|
||||
if (meta.depth == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto cmeta = meta;
|
||||
cmeta.depth--;
|
||||
|
||||
auto iter = pins.lower_bound(
|
||||
cmeta,
|
||||
typename btree_range_pin_t<node_bound_t>::meta_cmp_t());
|
||||
if (iter == pins.end()) {
|
||||
return nullptr;
|
||||
} else if (meta.is_parent_of(iter->range)) {
|
||||
return &*iter;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/// Releases pin if it has no children
|
||||
void release_if_no_children(btree_range_pin_t<node_bound_t> &pin)
|
||||
{
|
||||
ceph_assert(pin.is_linked());
|
||||
if (maybe_get_first_child(pin.range) == nullptr) {
|
||||
pin.drop_ref();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
/// Adds pin to set, assumes set is consistent
|
||||
void add_pin(btree_range_pin_t<node_bound_t> &pin)
|
||||
{
|
||||
ceph_assert(!pin.is_linked());
|
||||
ceph_assert(!pin.pins);
|
||||
ceph_assert(!pin.ref);
|
||||
|
||||
auto [prev, inserted] = pins.insert(pin);
|
||||
if (!inserted) {
|
||||
crimson::get_logger(ceph_subsys_seastore_lba).error(
|
||||
"{}: unable to add {} ({}), found {} ({})",
|
||||
__func__,
|
||||
pin,
|
||||
*(pin.extent),
|
||||
*prev,
|
||||
*(prev->extent));
|
||||
ceph_assert(0 == "impossible");
|
||||
return;
|
||||
}
|
||||
pin.pins = this;
|
||||
if (!pin.is_root()) {
|
||||
auto *parent = maybe_get_parent(pin.range);
|
||||
ceph_assert(parent);
|
||||
if (!parent->has_ref()) {
|
||||
crimson::get_logger(ceph_subsys_seastore_lba
|
||||
).debug("{}: acquiring parent {}", __func__,
|
||||
static_cast<void*>(parent));
|
||||
parent->acquire_ref();
|
||||
} else {
|
||||
crimson::get_logger(ceph_subsys_seastore_lba).debug(
|
||||
"{}: parent has ref {}", __func__,
|
||||
static_cast<void*>(parent));
|
||||
}
|
||||
}
|
||||
if (maybe_get_first_child(pin.range) != nullptr) {
|
||||
crimson::get_logger(ceph_subsys_seastore_lba).debug(
|
||||
"{}: acquiring self {}", __func__, pin);
|
||||
pin.acquire_ref();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* retire/check_parent
|
||||
*
|
||||
* See BtreeLBAManager::complete_transaction.
|
||||
* retire removes the specified pin from the set, but does not
|
||||
* check parents. After any new extents are added to the set,
|
||||
* the caller is required to call check_parent to restore the
|
||||
* invariant.
|
||||
*/
|
||||
void retire(btree_range_pin_t<node_bound_t> &pin)
|
||||
{
|
||||
pin.drop_ref();
|
||||
remove_pin(pin, false);
|
||||
}
|
||||
|
||||
void check_parent(btree_range_pin_t<node_bound_t> &pin)
|
||||
{
|
||||
auto parent = maybe_get_parent(pin.range);
|
||||
if (parent) {
|
||||
crimson::get_logger(ceph_subsys_seastore_lba
|
||||
).debug("{}: releasing parent {}", __func__, *parent);
|
||||
release_if_no_children(*parent);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void scan(F &&f) {
|
||||
for (auto &i : pins) {
|
||||
std::invoke(f, i);
|
||||
}
|
||||
}
|
||||
|
||||
~btree_pin_set_t() {
|
||||
ceph_assert(pins.empty());
|
||||
}
|
||||
};
|
||||
|
||||
template <typename key_t>
|
||||
class BtreeNodePin : public PhysicalNodePin<key_t> {
|
||||
|
||||
/**
|
||||
* parent
|
||||
*
|
||||
* populated until link_extent is called to ensure cache residence
|
||||
* until add_pin is called.
|
||||
*/
|
||||
CachedExtentRef parent;
|
||||
|
||||
paddr_t paddr;
|
||||
btree_range_pin_t<key_t> pin;
|
||||
|
||||
public:
|
||||
BtreeNodePin() = default;
|
||||
|
||||
BtreeNodePin(
|
||||
CachedExtentRef parent,
|
||||
paddr_t paddr,
|
||||
fixed_kv_node_meta_t<key_t> &&meta)
|
||||
: parent(parent), paddr(paddr) {
|
||||
pin.set_range(std::move(meta));
|
||||
}
|
||||
|
||||
btree_range_pin_t<key_t>& get_range_pin() {
|
||||
return pin;
|
||||
}
|
||||
|
||||
CachedExtentRef get_parent() {
|
||||
return parent;
|
||||
}
|
||||
|
||||
void set_parent(CachedExtentRef pin) {
|
||||
parent = pin;
|
||||
}
|
||||
|
||||
void link_extent(LogicalCachedExtent *ref) final {
|
||||
pin.set_extent(ref);
|
||||
}
|
||||
|
||||
extent_len_t get_length() const final {
|
||||
ceph_assert(pin.range.end > pin.range.begin);
|
||||
return pin.range.end - pin.range.begin;
|
||||
}
|
||||
|
||||
paddr_t get_paddr() const final {
|
||||
return paddr;
|
||||
}
|
||||
|
||||
key_t get_key() const final {
|
||||
return pin.range.begin;
|
||||
}
|
||||
|
||||
PhysicalNodePinRef<key_t> duplicate() const final {
|
||||
auto ret = std::unique_ptr<BtreeNodePin<key_t>>(
|
||||
new BtreeNodePin<key_t>);
|
||||
ret->pin.set_range(pin.range);
|
||||
ret->paddr = paddr;
|
||||
ret->parent = parent;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void take_pin(PhysicalNodePin<key_t> &opin) final {
|
||||
pin.take_pin(static_cast<BtreeNodePin<key_t>&>(opin).pin);
|
||||
}
|
||||
|
||||
bool has_been_invalidated() const final {
|
||||
return parent->has_been_invalidated();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
1794
src/crimson/os/seastore/btree/fixed_kv_btree.h
Normal file
1794
src/crimson/os/seastore/btree/fixed_kv_btree.h
Normal file
File diff suppressed because it is too large
Load Diff
448
src/crimson/os/seastore/btree/fixed_kv_node.h
Normal file
448
src/crimson/os/seastore/btree/fixed_kv_node.h
Normal file
@ -0,0 +1,448 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <memory>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#include "include/buffer.h"
|
||||
|
||||
#include "crimson/common/fixed_kv_node_layout.h"
|
||||
#include "crimson/common/errorator.h"
|
||||
#include "crimson/os/seastore/lba_manager.h"
|
||||
#include "crimson/os/seastore/seastore_types.h"
|
||||
#include "crimson/os/seastore/cache.h"
|
||||
#include "crimson/os/seastore/cached_extent.h"
|
||||
|
||||
#include "crimson/os/seastore/btree/btree_range_pin.h"
|
||||
#include "crimson/os/seastore/btree/fixed_kv_btree.h"
|
||||
|
||||
namespace crimson::os::seastore {
|
||||
|
||||
/**
|
||||
* FixedKVNode
|
||||
*
|
||||
* Base class enabling recursive lookup between internal and leaf nodes.
|
||||
*/
|
||||
template <typename node_key_t>
|
||||
struct FixedKVNode : CachedExtent {
|
||||
using FixedKVNodeRef = TCachedExtentRef<FixedKVNode>;
|
||||
|
||||
btree_range_pin_t<node_key_t> pin;
|
||||
|
||||
FixedKVNode(ceph::bufferptr &&ptr) : CachedExtent(std::move(ptr)), pin(this) {}
|
||||
FixedKVNode(const FixedKVNode &rhs)
|
||||
: CachedExtent(rhs), pin(rhs.pin, this) {}
|
||||
|
||||
virtual fixed_kv_node_meta_t<node_key_t> get_node_meta() const = 0;
|
||||
|
||||
virtual ~FixedKVNode() = default;
|
||||
|
||||
void on_delta_write(paddr_t record_block_offset) final {
|
||||
// All in-memory relative addrs are necessarily record-relative
|
||||
assert(get_prior_instance());
|
||||
pin.take_pin(get_prior_instance()->template cast<FixedKVNode>()->pin);
|
||||
resolve_relative_addrs(record_block_offset);
|
||||
}
|
||||
|
||||
void on_initial_write() final {
|
||||
// All in-memory relative addrs are necessarily block-relative
|
||||
resolve_relative_addrs(get_paddr());
|
||||
}
|
||||
|
||||
void on_clean_read() final {
|
||||
// From initial write of block, relative addrs are necessarily block-relative
|
||||
resolve_relative_addrs(get_paddr());
|
||||
}
|
||||
|
||||
virtual void resolve_relative_addrs(paddr_t base) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* FixedKVInternalNode
|
||||
*
|
||||
* Abstracts operations on and layout of internal nodes for the
|
||||
* LBA Tree.
|
||||
*/
|
||||
template <
|
||||
size_t CAPACITY,
|
||||
typename NODE_KEY,
|
||||
typename NODE_KEY_LE,
|
||||
size_t node_size,
|
||||
typename node_type_t>
|
||||
struct FixedKVInternalNode
|
||||
: FixedKVNode<NODE_KEY>,
|
||||
common::FixedKVNodeLayout<
|
||||
CAPACITY,
|
||||
fixed_kv_node_meta_t<NODE_KEY>,
|
||||
fixed_kv_node_meta_le_t<NODE_KEY_LE>,
|
||||
NODE_KEY, NODE_KEY_LE,
|
||||
paddr_t, paddr_le_t> {
|
||||
using Ref = TCachedExtentRef<node_type_t>;
|
||||
using node_layout_t =
|
||||
common::FixedKVNodeLayout<
|
||||
CAPACITY,
|
||||
fixed_kv_node_meta_t<NODE_KEY>,
|
||||
fixed_kv_node_meta_le_t<NODE_KEY_LE>,
|
||||
NODE_KEY,
|
||||
NODE_KEY_LE,
|
||||
paddr_t,
|
||||
paddr_le_t>;
|
||||
using internal_const_iterator_t = typename node_layout_t::const_iterator;
|
||||
using internal_iterator_t = typename node_layout_t::iterator;
|
||||
template <typename... T>
|
||||
FixedKVInternalNode(T&&... t) :
|
||||
FixedKVNode<NODE_KEY>(std::forward<T>(t)...),
|
||||
node_layout_t(this->get_bptr().c_str()) {}
|
||||
|
||||
virtual ~FixedKVInternalNode() {}
|
||||
|
||||
fixed_kv_node_meta_t<NODE_KEY> get_node_meta() const {
|
||||
return this->get_meta();
|
||||
}
|
||||
|
||||
typename node_layout_t::delta_buffer_t delta_buffer;
|
||||
typename node_layout_t::delta_buffer_t *maybe_get_delta_buffer() {
|
||||
return this->is_mutation_pending()
|
||||
? &delta_buffer : nullptr;
|
||||
}
|
||||
|
||||
CachedExtentRef duplicate_for_write() override {
|
||||
assert(delta_buffer.empty());
|
||||
return CachedExtentRef(new node_type_t(*this));
|
||||
};
|
||||
|
||||
void update(
|
||||
internal_const_iterator_t iter,
|
||||
paddr_t addr) {
|
||||
return this->journal_update(
|
||||
iter,
|
||||
this->maybe_generate_relative(addr),
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
void insert(
|
||||
internal_const_iterator_t iter,
|
||||
NODE_KEY pivot,
|
||||
paddr_t addr) {
|
||||
return this->journal_insert(
|
||||
iter,
|
||||
pivot,
|
||||
this->maybe_generate_relative(addr),
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
void remove(internal_const_iterator_t iter) {
|
||||
return this->journal_remove(
|
||||
iter,
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
void replace(
|
||||
internal_const_iterator_t iter,
|
||||
NODE_KEY pivot,
|
||||
paddr_t addr) {
|
||||
return this->journal_replace(
|
||||
iter,
|
||||
pivot,
|
||||
this->maybe_generate_relative(addr),
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
std::tuple<Ref, Ref, NODE_KEY>
|
||||
make_split_children(op_context_t<NODE_KEY> c) {
|
||||
auto left = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
auto right = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
auto pivot = this->split_into(*left, *right);
|
||||
left->pin.set_range(left->get_meta());
|
||||
right->pin.set_range(right->get_meta());
|
||||
return std::make_tuple(
|
||||
left,
|
||||
right,
|
||||
pivot);
|
||||
}
|
||||
|
||||
Ref make_full_merge(
|
||||
op_context_t<NODE_KEY> c,
|
||||
Ref &right) {
|
||||
auto replacement = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
replacement->merge_from(*this, *right->template cast<node_type_t>());
|
||||
replacement->pin.set_range(replacement->get_meta());
|
||||
return replacement;
|
||||
}
|
||||
|
||||
std::tuple<Ref, Ref, NODE_KEY>
|
||||
make_balanced(
|
||||
op_context_t<NODE_KEY> c,
|
||||
Ref &_right,
|
||||
bool prefer_left) {
|
||||
ceph_assert(_right->get_type() == this->get_type());
|
||||
auto &right = *_right->template cast<node_type_t>();
|
||||
auto replacement_left = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
auto replacement_right = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
|
||||
auto pivot = this->balance_into_new_nodes(
|
||||
*this,
|
||||
right,
|
||||
prefer_left,
|
||||
*replacement_left,
|
||||
*replacement_right);
|
||||
|
||||
replacement_left->pin.set_range(replacement_left->get_meta());
|
||||
replacement_right->pin.set_range(replacement_right->get_meta());
|
||||
return std::make_tuple(
|
||||
replacement_left,
|
||||
replacement_right,
|
||||
pivot);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal relative addresses on read or in memory prior to commit
|
||||
* are either record or block relative depending on whether this
|
||||
* physical node is is_initial_pending() or just is_pending().
|
||||
*
|
||||
* User passes appropriate base depending on lifecycle and
|
||||
* resolve_relative_addrs fixes up relative internal references
|
||||
* based on base.
|
||||
*/
|
||||
void resolve_relative_addrs(paddr_t base)
|
||||
{
|
||||
LOG_PREFIX(FixedKVInternalNode::resolve_relative_addrs);
|
||||
for (auto i: *this) {
|
||||
if (i->get_val().is_relative()) {
|
||||
auto updated = base.add_relative(i->get_val());
|
||||
SUBTRACE(seastore_lba_details, "{} -> {}", i->get_val(), updated);
|
||||
i->set_val(updated);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void node_resolve_vals(
|
||||
internal_iterator_t from,
|
||||
internal_iterator_t to) const {
|
||||
if (this->is_initial_pending()) {
|
||||
for (auto i = from; i != to; ++i) {
|
||||
if (i->get_val().is_relative()) {
|
||||
assert(i->get_val().is_block_relative());
|
||||
i->set_val(this->get_paddr().add_relative(i->get_val()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void node_unresolve_vals(
|
||||
internal_iterator_t from,
|
||||
internal_iterator_t to) const {
|
||||
if (this->is_initial_pending()) {
|
||||
for (auto i = from; i != to; ++i) {
|
||||
if (i->get_val().is_relative()) {
|
||||
assert(i->get_val().is_record_relative());
|
||||
i->set_val(i->get_val() - this->get_paddr());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &print_detail(std::ostream &out) const
|
||||
{
|
||||
return out << ", size=" << this->get_size()
|
||||
<< ", meta=" << this->get_meta();
|
||||
}
|
||||
|
||||
ceph::bufferlist get_delta() {
|
||||
ceph::buffer::ptr bptr(delta_buffer.get_bytes());
|
||||
delta_buffer.copy_out(bptr.c_str(), bptr.length());
|
||||
ceph::bufferlist bl;
|
||||
bl.push_back(bptr);
|
||||
return bl;
|
||||
}
|
||||
|
||||
void apply_delta_and_adjust_crc(
|
||||
paddr_t base, const ceph::bufferlist &_bl) {
|
||||
assert(_bl.length());
|
||||
ceph::bufferlist bl = _bl;
|
||||
bl.rebuild();
|
||||
typename node_layout_t::delta_buffer_t buffer;
|
||||
buffer.copy_in(bl.front().c_str(), bl.front().length());
|
||||
buffer.replay(*this);
|
||||
this->set_last_committed_crc(this->get_crc32c());
|
||||
resolve_relative_addrs(base);
|
||||
}
|
||||
|
||||
constexpr static size_t get_min_capacity() {
|
||||
return (node_layout_t::get_capacity() - 1) / 2;
|
||||
}
|
||||
|
||||
bool at_max_capacity() const {
|
||||
assert(this->get_size() <= node_layout_t::get_capacity());
|
||||
return this->get_size() == node_layout_t::get_capacity();
|
||||
}
|
||||
|
||||
bool at_min_capacity() const {
|
||||
assert(this->get_size() >= (get_min_capacity() - 1));
|
||||
return this->get_size() <= get_min_capacity();
|
||||
}
|
||||
|
||||
bool below_min_capacity() const {
|
||||
assert(this->get_size() >= (get_min_capacity() - 1));
|
||||
return this->get_size() < get_min_capacity();
|
||||
}
|
||||
};
|
||||
|
||||
template <
|
||||
size_t CAPACITY,
|
||||
typename NODE_KEY,
|
||||
typename NODE_KEY_LE,
|
||||
typename VAL,
|
||||
typename VAL_LE,
|
||||
size_t node_size,
|
||||
typename node_type_t>
|
||||
struct FixedKVLeafNode
|
||||
: FixedKVNode<NODE_KEY>,
|
||||
common::FixedKVNodeLayout<
|
||||
CAPACITY,
|
||||
fixed_kv_node_meta_t<NODE_KEY>,
|
||||
fixed_kv_node_meta_le_t<NODE_KEY_LE>,
|
||||
NODE_KEY, NODE_KEY_LE,
|
||||
VAL, VAL_LE> {
|
||||
using Ref = TCachedExtentRef<node_type_t>;
|
||||
using node_layout_t =
|
||||
common::FixedKVNodeLayout<
|
||||
CAPACITY,
|
||||
fixed_kv_node_meta_t<NODE_KEY>,
|
||||
fixed_kv_node_meta_le_t<NODE_KEY_LE>,
|
||||
NODE_KEY,
|
||||
NODE_KEY_LE,
|
||||
VAL,
|
||||
VAL_LE>;
|
||||
using internal_const_iterator_t = typename node_layout_t::const_iterator;
|
||||
template <typename... T>
|
||||
FixedKVLeafNode(T&&... t) :
|
||||
FixedKVNode<NODE_KEY>(std::forward<T>(t)...),
|
||||
node_layout_t(this->get_bptr().c_str()) {}
|
||||
|
||||
virtual ~FixedKVLeafNode() {}
|
||||
|
||||
fixed_kv_node_meta_t<NODE_KEY> get_node_meta() const {
|
||||
return this->get_meta();
|
||||
}
|
||||
|
||||
typename node_layout_t::delta_buffer_t delta_buffer;
|
||||
virtual typename node_layout_t::delta_buffer_t *maybe_get_delta_buffer() {
|
||||
return this->is_mutation_pending() ? &delta_buffer : nullptr;
|
||||
}
|
||||
|
||||
CachedExtentRef duplicate_for_write() override {
|
||||
assert(delta_buffer.empty());
|
||||
return CachedExtentRef(new node_type_t(*this));
|
||||
};
|
||||
|
||||
virtual void update(
|
||||
internal_const_iterator_t iter,
|
||||
VAL val) = 0;
|
||||
virtual internal_const_iterator_t insert(
|
||||
internal_const_iterator_t iter,
|
||||
NODE_KEY addr,
|
||||
VAL val) = 0;
|
||||
virtual void remove(internal_const_iterator_t iter) = 0;
|
||||
|
||||
std::tuple<Ref, Ref, NODE_KEY>
|
||||
make_split_children(op_context_t<NODE_KEY> c) {
|
||||
auto left = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
auto right = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
auto pivot = this->split_into(*left, *right);
|
||||
left->pin.set_range(left->get_meta());
|
||||
right->pin.set_range(right->get_meta());
|
||||
return std::make_tuple(
|
||||
left,
|
||||
right,
|
||||
pivot);
|
||||
}
|
||||
|
||||
Ref make_full_merge(
|
||||
op_context_t<NODE_KEY> c,
|
||||
Ref &right) {
|
||||
auto replacement = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
replacement->merge_from(*this, *right->template cast<node_type_t>());
|
||||
replacement->pin.set_range(replacement->get_meta());
|
||||
return replacement;
|
||||
}
|
||||
|
||||
std::tuple<Ref, Ref, NODE_KEY>
|
||||
make_balanced(
|
||||
op_context_t<NODE_KEY> c,
|
||||
Ref &_right,
|
||||
bool prefer_left) {
|
||||
ceph_assert(_right->get_type() == this->get_type());
|
||||
auto &right = *_right->template cast<node_type_t>();
|
||||
auto replacement_left = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
auto replacement_right = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
|
||||
auto pivot = this->balance_into_new_nodes(
|
||||
*this,
|
||||
right,
|
||||
prefer_left,
|
||||
*replacement_left,
|
||||
*replacement_right);
|
||||
|
||||
replacement_left->pin.set_range(replacement_left->get_meta());
|
||||
replacement_right->pin.set_range(replacement_right->get_meta());
|
||||
return std::make_tuple(
|
||||
replacement_left,
|
||||
replacement_right,
|
||||
pivot);
|
||||
}
|
||||
|
||||
ceph::bufferlist get_delta() {
|
||||
ceph::buffer::ptr bptr(delta_buffer.get_bytes());
|
||||
delta_buffer.copy_out(bptr.c_str(), bptr.length());
|
||||
ceph::bufferlist bl;
|
||||
bl.push_back(bptr);
|
||||
return bl;
|
||||
}
|
||||
|
||||
void apply_delta_and_adjust_crc(
|
||||
paddr_t base, const ceph::bufferlist &_bl) {
|
||||
assert(_bl.length());
|
||||
ceph::bufferlist bl = _bl;
|
||||
bl.rebuild();
|
||||
typename node_layout_t::delta_buffer_t buffer;
|
||||
buffer.copy_in(bl.front().c_str(), bl.front().length());
|
||||
buffer.replay(*this);
|
||||
this->set_last_committed_crc(this->get_crc32c());
|
||||
this->resolve_relative_addrs(base);
|
||||
}
|
||||
|
||||
constexpr static size_t get_min_capacity() {
|
||||
return (node_layout_t::get_capacity() - 1) / 2;
|
||||
}
|
||||
|
||||
bool at_max_capacity() const {
|
||||
assert(this->get_size() <= node_layout_t::get_capacity());
|
||||
return this->get_size() == node_layout_t::get_capacity();
|
||||
}
|
||||
|
||||
bool at_min_capacity() const {
|
||||
assert(this->get_size() >= (get_min_capacity() - 1));
|
||||
return this->get_size() <= get_min_capacity();
|
||||
}
|
||||
|
||||
bool below_min_capacity() const {
|
||||
assert(this->get_size() >= (get_min_capacity() - 1));
|
||||
return this->get_size() < get_min_capacity();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace crimson::os::seastore
|
@ -81,7 +81,7 @@ std::ostream &LogicalCachedExtent::print_detail(std::ostream &out) const
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, const LBAPin &rhs)
|
||||
{
|
||||
return out << "LBAPin(" << rhs.get_laddr() << "~" << rhs.get_length()
|
||||
return out << "LBAPin(" << rhs.get_key() << "~" << rhs.get_length()
|
||||
<< "->" << rhs.get_paddr();
|
||||
}
|
||||
|
||||
|
@ -666,20 +666,30 @@ private:
|
||||
};
|
||||
|
||||
class LogicalCachedExtent;
|
||||
class LBAPin;
|
||||
using LBAPinRef = std::unique_ptr<LBAPin>;
|
||||
class LBAPin {
|
||||
|
||||
template <typename key_t>
|
||||
class PhysicalNodePin;
|
||||
|
||||
template <typename key_t>
|
||||
using PhysicalNodePinRef = std::unique_ptr<PhysicalNodePin<key_t>>;
|
||||
|
||||
template <typename key_t>
|
||||
class PhysicalNodePin {
|
||||
public:
|
||||
virtual void link_extent(LogicalCachedExtent *ref) = 0;
|
||||
virtual void take_pin(LBAPin &pin) = 0;
|
||||
virtual void take_pin(PhysicalNodePin<key_t> &pin) = 0;
|
||||
virtual extent_len_t get_length() const = 0;
|
||||
virtual paddr_t get_paddr() const = 0;
|
||||
virtual laddr_t get_laddr() const = 0;
|
||||
virtual LBAPinRef duplicate() const = 0;
|
||||
virtual key_t get_key() const = 0;
|
||||
virtual PhysicalNodePinRef<key_t> duplicate() const = 0;
|
||||
virtual bool has_been_invalidated() const = 0;
|
||||
|
||||
virtual ~LBAPin() {}
|
||||
virtual ~PhysicalNodePin() {}
|
||||
};
|
||||
|
||||
using LBAPin = PhysicalNodePin<laddr_t>;
|
||||
using LBAPinRef = PhysicalNodePinRef<laddr_t>;
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, const LBAPin &rhs);
|
||||
|
||||
using lba_pin_list_t = std::list<LBAPinRef>;
|
||||
@ -756,7 +766,7 @@ public:
|
||||
void set_pin(LBAPinRef &&npin) {
|
||||
assert(!pin);
|
||||
pin = std::move(npin);
|
||||
laddr = pin->get_laddr();
|
||||
laddr = pin->get_key();
|
||||
pin->link_extent(this);
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include "include/buffer.h"
|
||||
#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
|
||||
#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
|
||||
#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
|
||||
#include "crimson/os/seastore/logging.h"
|
||||
|
||||
SET_SUBSYS(seastore_lba);
|
||||
@ -20,6 +19,22 @@ SET_SUBSYS(seastore_lba);
|
||||
* - TRACE: read operations, DEBUG details
|
||||
*/
|
||||
|
||||
namespace crimson::os::seastore {
|
||||
|
||||
template<>
|
||||
Transaction::tree_stats_t& get_tree_stats<
|
||||
crimson::os::seastore::lba_manager::btree::LBABtree>(Transaction &t) {
|
||||
return t.get_lba_tree_stats();
|
||||
}
|
||||
|
||||
template<>
|
||||
phy_tree_root_t& get_phy_tree_root<
|
||||
crimson::os::seastore::lba_manager::btree::LBABtree>(root_t &r) {
|
||||
return r.lba_root;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace crimson::os::seastore::lba_manager::btree {
|
||||
|
||||
BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs(
|
||||
@ -46,7 +61,8 @@ BtreeLBAManager::get_mappings(
|
||||
LOG_PREFIX(BtreeLBAManager::get_mappings);
|
||||
TRACET("{}~{}", t, offset, length);
|
||||
auto c = get_context(t);
|
||||
return with_btree_state<lba_pin_list_t>(
|
||||
return with_btree_state<LBABtree, lba_pin_list_t>(
|
||||
cache,
|
||||
c,
|
||||
[c, offset, length, FNAME](auto &btree, auto &ret) {
|
||||
return LBABtree::iterate_repeat(
|
||||
@ -104,7 +120,8 @@ BtreeLBAManager::get_mapping(
|
||||
LOG_PREFIX(BtreeLBAManager::get_mapping);
|
||||
TRACET("{}", t, offset);
|
||||
auto c = get_context(t);
|
||||
return with_btree_ret<LBAPinRef>(
|
||||
return with_btree_ret<LBABtree, LBAPinRef>(
|
||||
cache,
|
||||
c,
|
||||
[FNAME, c, offset](auto &btree) {
|
||||
return btree.lower_bound(
|
||||
@ -147,7 +164,8 @@ BtreeLBAManager::alloc_extent(
|
||||
auto c = get_context(t);
|
||||
++stats.num_alloc_extents;
|
||||
auto lookup_attempts = stats.num_alloc_extents_iter_nexts;
|
||||
return with_btree_state<state_t>(
|
||||
return crimson::os::seastore::with_btree_state<LBABtree, state_t>(
|
||||
cache,
|
||||
c,
|
||||
hint,
|
||||
[this, FNAME, c, hint, len, addr, lookup_attempts, &t](auto &btree, auto &state) {
|
||||
@ -210,13 +228,13 @@ static bool is_lba_node(const CachedExtent &e)
|
||||
return is_lba_node(e.get_type());
|
||||
}
|
||||
|
||||
btree_range_pin_t &BtreeLBAManager::get_pin(CachedExtent &e)
|
||||
btree_range_pin_t<laddr_t> &BtreeLBAManager::get_pin(CachedExtent &e)
|
||||
{
|
||||
if (is_lba_node(e)) {
|
||||
return e.cast<LBANode>()->pin;
|
||||
} else if (e.is_logical()) {
|
||||
return static_cast<BtreeLBAPin &>(
|
||||
e.cast<LogicalCachedExtent>()->get_pin()).pin;
|
||||
e.cast<LogicalCachedExtent>()->get_pin()).get_range_pin();
|
||||
} else {
|
||||
ceph_abort_msg("impossible");
|
||||
}
|
||||
@ -280,23 +298,57 @@ void BtreeLBAManager::complete_transaction(
|
||||
}
|
||||
}
|
||||
|
||||
BtreeLBAManager::base_iertr::future<> _init_cached_extent(
|
||||
op_context_t<laddr_t> c,
|
||||
const CachedExtentRef &e,
|
||||
LBABtree &btree,
|
||||
bool &ret)
|
||||
{
|
||||
if (e->is_logical()) {
|
||||
auto logn = e->cast<LogicalCachedExtent>();
|
||||
return btree.lower_bound(
|
||||
c,
|
||||
logn->get_laddr()
|
||||
).si_then([e, c, logn, &ret](auto iter) {
|
||||
LOG_PREFIX(BtreeLBAManager::init_cached_extent);
|
||||
if (!iter.is_end() &&
|
||||
iter.get_key() == logn->get_laddr() &&
|
||||
iter.get_val().paddr == logn->get_paddr()) {
|
||||
logn->set_pin(iter.get_pin());
|
||||
ceph_assert(iter.get_val().len == e->get_length());
|
||||
if (c.pins) {
|
||||
c.pins->add_pin(
|
||||
static_cast<BtreeLBAPin&>(logn->get_pin()).get_range_pin());
|
||||
}
|
||||
DEBUGT("logical extent {} live", c.trans, *logn);
|
||||
ret = true;
|
||||
} else {
|
||||
DEBUGT("logical extent {} not live", c.trans, *logn);
|
||||
ret = false;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
return btree.init_cached_extent(c, e
|
||||
).si_then([&ret](bool is_alive) {
|
||||
ret = is_alive;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
|
||||
Transaction &t,
|
||||
CachedExtentRef e)
|
||||
{
|
||||
LOG_PREFIX(BtreeLBAManager::init_cached_extent);
|
||||
TRACET("{}", t, *e);
|
||||
return seastar::do_with(bool(), [this, e, FNAME, &t](bool& ret) {
|
||||
return seastar::do_with(bool(), [this, e, &t](bool &ret) {
|
||||
auto c = get_context(t);
|
||||
return with_btree(c, [c, e, &ret](auto &btree) {
|
||||
return btree.init_cached_extent(c, e
|
||||
).si_then([&ret](bool is_alive) {
|
||||
ret = is_alive;
|
||||
});
|
||||
}).si_then([&ret, e, FNAME, c] {
|
||||
DEBUGT("is_alive={} -- {}", c.trans, ret, *e);
|
||||
return ret;
|
||||
});
|
||||
return with_btree<LBABtree>(cache, c, [c, e, &ret](auto &btree)
|
||||
-> base_iertr::future<> {
|
||||
LOG_PREFIX(BtreeLBAManager::init_cached_extent);
|
||||
DEBUGT("extent {}", c.trans, *e);
|
||||
return _init_cached_extent(c, e, btree, ret);
|
||||
}).si_then([&ret] { return ret; });
|
||||
});
|
||||
}
|
||||
|
||||
@ -310,7 +362,8 @@ BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings(
|
||||
DEBUGT("begin: {}, end: {}", t, begin, end);
|
||||
|
||||
auto c = get_context(t);
|
||||
return with_btree(
|
||||
return with_btree<LBABtree>(
|
||||
cache,
|
||||
c,
|
||||
[c, f=std::move(f), begin, end](auto &btree) mutable {
|
||||
return LBABtree::iterate_repeat(
|
||||
@ -341,7 +394,8 @@ BtreeLBAManager::scan_mapped_space_ret BtreeLBAManager::scan_mapped_space(
|
||||
return seastar::do_with(
|
||||
std::move(f),
|
||||
[this, c](auto &visitor) {
|
||||
return with_btree(
|
||||
return with_btree<LBABtree>(
|
||||
cache,
|
||||
c,
|
||||
[c, &visitor](auto &btree) {
|
||||
return LBABtree::iterate_repeat(
|
||||
@ -377,10 +431,11 @@ BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent(
|
||||
if (is_lba_node(*extent)) {
|
||||
DEBUGT("rewriting lba extent -- {}", t, *extent);
|
||||
auto c = get_context(t);
|
||||
return with_btree(
|
||||
return with_btree<LBABtree>(
|
||||
cache,
|
||||
c,
|
||||
[c, extent](auto &btree) mutable {
|
||||
return btree.rewrite_lba_extent(c, extent);
|
||||
return btree.rewrite_extent(c, extent);
|
||||
});
|
||||
} else {
|
||||
DEBUGT("skip non lba extent -- {}", t, *extent);
|
||||
@ -433,7 +488,8 @@ BtreeLBAManager::get_physical_extent_if_live(
|
||||
t, type, laddr, addr, len);
|
||||
ceph_assert(is_lba_node(type));
|
||||
auto c = get_context(t);
|
||||
return with_btree_ret<CachedExtentRef>(
|
||||
return with_btree_ret<LBABtree, CachedExtentRef>(
|
||||
cache,
|
||||
c,
|
||||
[c, type, addr, laddr, len](auto &btree) {
|
||||
if (type == extent_types_t::LADDR_INTERNAL) {
|
||||
@ -509,7 +565,8 @@ BtreeLBAManager::_update_mapping_ret BtreeLBAManager::_update_mapping(
|
||||
update_func_t &&f)
|
||||
{
|
||||
auto c = get_context(t);
|
||||
return with_btree_ret<lba_map_val_t>(
|
||||
return with_btree_ret<LBABtree, lba_map_val_t>(
|
||||
cache,
|
||||
c,
|
||||
[f=std::move(f), c, addr](auto &btree) mutable {
|
||||
return btree.lower_bound(
|
||||
|
@ -15,16 +15,23 @@
|
||||
#include "common/interval_map.h"
|
||||
#include "crimson/osd/exceptions.h"
|
||||
|
||||
#include "crimson/os/seastore/btree/fixed_kv_btree.h"
|
||||
#include "crimson/os/seastore/seastore_types.h"
|
||||
#include "crimson/os/seastore/lba_manager.h"
|
||||
#include "crimson/os/seastore/cache.h"
|
||||
#include "crimson/os/seastore/segment_manager.h"
|
||||
|
||||
#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
|
||||
#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
|
||||
#include "crimson/os/seastore/btree/btree_range_pin.h"
|
||||
|
||||
namespace crimson::os::seastore::lba_manager::btree {
|
||||
|
||||
using LBABtree = FixedKVBtree<
|
||||
laddr_t, lba_map_val_t, LBAInternalNode,
|
||||
LBALeafNode, LBA_BLOCK_SIZE>;
|
||||
|
||||
using BtreeLBAPin = BtreeNodePin<laddr_t>;
|
||||
|
||||
/**
|
||||
* BtreeLBAManager
|
||||
*
|
||||
@ -84,6 +91,14 @@ public:
|
||||
void complete_transaction(
|
||||
Transaction &t) final;
|
||||
|
||||
/**
|
||||
* init_cached_extent
|
||||
*
|
||||
* Checks whether e is live (reachable from lba tree) and drops or initializes
|
||||
* accordingly.
|
||||
*
|
||||
* Returns if e is live.
|
||||
*/
|
||||
init_cached_extent_ret init_cached_extent(
|
||||
Transaction &t,
|
||||
CachedExtentRef e) final;
|
||||
@ -117,8 +132,8 @@ public:
|
||||
|
||||
void add_pin(LBAPin &pin) final {
|
||||
auto *bpin = reinterpret_cast<BtreeLBAPin*>(&pin);
|
||||
pin_set.add_pin(bpin->pin);
|
||||
bpin->parent = nullptr;
|
||||
pin_set.add_pin(bpin->get_range_pin());
|
||||
bpin->set_parent(nullptr);
|
||||
}
|
||||
|
||||
~BtreeLBAManager();
|
||||
@ -126,84 +141,21 @@ private:
|
||||
SegmentManager &segment_manager;
|
||||
Cache &cache;
|
||||
|
||||
btree_pin_set_t pin_set;
|
||||
btree_pin_set_t<laddr_t> pin_set;
|
||||
|
||||
struct {
|
||||
uint64_t num_alloc_extents = 0;
|
||||
uint64_t num_alloc_extents_iter_nexts = 0;
|
||||
} stats;
|
||||
|
||||
op_context_t get_context(Transaction &t) {
|
||||
return op_context_t{cache, t, &pin_set};
|
||||
op_context_t<laddr_t> get_context(Transaction &t) {
|
||||
return op_context_t<laddr_t>{cache, t, &pin_set};
|
||||
}
|
||||
|
||||
static btree_range_pin_t &get_pin(CachedExtent &e);
|
||||
static btree_range_pin_t<laddr_t> &get_pin(CachedExtent &e);
|
||||
|
||||
seastar::metrics::metric_group metrics;
|
||||
void register_metrics();
|
||||
template <typename F, typename... Args>
|
||||
auto with_btree(
|
||||
op_context_t c,
|
||||
F &&f) {
|
||||
return cache.get_root(
|
||||
c.trans
|
||||
).si_then([this, c, f=std::forward<F>(f)](RootBlockRef croot) mutable {
|
||||
return seastar::do_with(
|
||||
LBABtree(croot->get_root().lba_root),
|
||||
[this, c, croot, f=std::move(f)](auto &btree) mutable {
|
||||
return f(
|
||||
btree
|
||||
).si_then([this, c, croot, &btree] {
|
||||
if (btree.is_root_dirty()) {
|
||||
auto mut_croot = cache.duplicate_for_write(
|
||||
c.trans, croot
|
||||
)->cast<RootBlock>();
|
||||
mut_croot->get_root().lba_root = btree.get_root_undirty();
|
||||
}
|
||||
return base_iertr::now();
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
template <typename State, typename F>
|
||||
auto with_btree_state(
|
||||
op_context_t c,
|
||||
State &&init,
|
||||
F &&f) {
|
||||
return seastar::do_with(
|
||||
std::forward<State>(init),
|
||||
[this, c, f=std::forward<F>(f)](auto &state) mutable {
|
||||
(void)this; // silence incorrect clang warning about capture
|
||||
return with_btree(c, [&state, f=std::move(f)](auto &btree) mutable {
|
||||
return f(btree, state);
|
||||
}).si_then([&state] {
|
||||
return seastar::make_ready_future<State>(std::move(state));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
template <typename State, typename F>
|
||||
auto with_btree_state(
|
||||
op_context_t c,
|
||||
F &&f) {
|
||||
return with_btree_state<State, F>(c, State{}, std::forward<F>(f));
|
||||
}
|
||||
|
||||
template <typename Ret, typename F>
|
||||
auto with_btree_ret(
|
||||
op_context_t c,
|
||||
F &&f) {
|
||||
return with_btree_state<Ret>(
|
||||
c,
|
||||
[f=std::forward<F>(f)](auto &btree, auto &ret) mutable {
|
||||
return f(
|
||||
btree
|
||||
).si_then([&ret](auto &&_ret) {
|
||||
ret = std::move(_ret);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* update_refcount
|
||||
|
@ -1,155 +0,0 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
|
||||
#include "crimson/os/seastore/logging.h"
|
||||
|
||||
SET_SUBSYS(seastore_lba);
|
||||
|
||||
namespace crimson::os::seastore::lba_manager::btree {
|
||||
|
||||
void btree_range_pin_t::take_pin(btree_range_pin_t &other)
|
||||
{
|
||||
ceph_assert(other.extent);
|
||||
if (other.pins) {
|
||||
other.pins->replace_pin(*this, other);
|
||||
pins = other.pins;
|
||||
other.pins = nullptr;
|
||||
|
||||
if (other.has_ref()) {
|
||||
other.drop_ref();
|
||||
acquire_ref();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
btree_range_pin_t::~btree_range_pin_t()
|
||||
{
|
||||
LOG_PREFIX(btree_range_pin_t::~btree_range_pin_t);
|
||||
ceph_assert(!pins == !is_linked());
|
||||
ceph_assert(!ref);
|
||||
if (pins) {
|
||||
TRACE("removing {}", *this);
|
||||
pins->remove_pin(*this, true);
|
||||
}
|
||||
extent = nullptr;
|
||||
}
|
||||
|
||||
void btree_pin_set_t::replace_pin(btree_range_pin_t &to, btree_range_pin_t &from)
|
||||
{
|
||||
pins.replace_node(pins.iterator_to(from), to);
|
||||
}
|
||||
|
||||
void btree_pin_set_t::remove_pin(btree_range_pin_t &pin, bool do_check_parent)
|
||||
{
|
||||
LOG_PREFIX(btree_pin_set_t::remove_pin);
|
||||
TRACE("{}", pin);
|
||||
ceph_assert(pin.is_linked());
|
||||
ceph_assert(pin.pins);
|
||||
ceph_assert(!pin.ref);
|
||||
|
||||
pins.erase(pin);
|
||||
pin.pins = nullptr;
|
||||
|
||||
if (do_check_parent) {
|
||||
check_parent(pin);
|
||||
}
|
||||
}
|
||||
|
||||
btree_range_pin_t *btree_pin_set_t::maybe_get_parent(
|
||||
const lba_node_meta_t &meta)
|
||||
{
|
||||
auto cmeta = meta;
|
||||
cmeta.depth++;
|
||||
auto iter = pins.upper_bound(cmeta, btree_range_pin_t::meta_cmp_t());
|
||||
if (iter == pins.begin()) {
|
||||
return nullptr;
|
||||
} else {
|
||||
--iter;
|
||||
if (iter->range.is_parent_of(meta)) {
|
||||
return &*iter;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const btree_range_pin_t *btree_pin_set_t::maybe_get_first_child(
|
||||
const lba_node_meta_t &meta) const
|
||||
{
|
||||
if (meta.depth == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto cmeta = meta;
|
||||
cmeta.depth--;
|
||||
|
||||
auto iter = pins.lower_bound(cmeta, btree_range_pin_t::meta_cmp_t());
|
||||
if (iter == pins.end()) {
|
||||
return nullptr;
|
||||
} else if (meta.is_parent_of(iter->range)) {
|
||||
return &*iter;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void btree_pin_set_t::release_if_no_children(btree_range_pin_t &pin)
|
||||
{
|
||||
ceph_assert(pin.is_linked());
|
||||
if (maybe_get_first_child(pin.range) == nullptr) {
|
||||
pin.drop_ref();
|
||||
}
|
||||
}
|
||||
|
||||
void btree_pin_set_t::add_pin(btree_range_pin_t &pin)
|
||||
{
|
||||
LOG_PREFIX(btree_pin_set_t::add_pin);
|
||||
ceph_assert(!pin.is_linked());
|
||||
ceph_assert(!pin.pins);
|
||||
ceph_assert(!pin.ref);
|
||||
|
||||
auto [prev, inserted] = pins.insert(pin);
|
||||
if (!inserted) {
|
||||
ERROR("unable to add {} ({}), found {} ({})",
|
||||
pin,
|
||||
*(pin.extent),
|
||||
*prev,
|
||||
*(prev->extent));
|
||||
ceph_assert(0 == "impossible");
|
||||
return;
|
||||
}
|
||||
pin.pins = this;
|
||||
if (!pin.is_root()) {
|
||||
auto *parent = maybe_get_parent(pin.range);
|
||||
ceph_assert(parent);
|
||||
if (!parent->has_ref()) {
|
||||
TRACE("acquiring parent {}", static_cast<void*>(parent));
|
||||
parent->acquire_ref();
|
||||
} else {
|
||||
TRACE("parent has ref {}", static_cast<void*>(parent));
|
||||
}
|
||||
}
|
||||
if (maybe_get_first_child(pin.range) != nullptr) {
|
||||
TRACE("acquiring self {}", pin);
|
||||
pin.acquire_ref();
|
||||
}
|
||||
}
|
||||
|
||||
void btree_pin_set_t::retire(btree_range_pin_t &pin)
|
||||
{
|
||||
pin.drop_ref();
|
||||
remove_pin(pin, false);
|
||||
}
|
||||
|
||||
void btree_pin_set_t::check_parent(btree_range_pin_t &pin)
|
||||
{
|
||||
LOG_PREFIX(btree_pin_set_t::check_parent);
|
||||
auto parent = maybe_get_parent(pin.range);
|
||||
if (parent) {
|
||||
TRACE("releasing parent {}", *parent);
|
||||
release_if_no_children(*parent);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,292 +0,0 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/intrusive/set.hpp>
|
||||
|
||||
#include "crimson/os/seastore/cached_extent.h"
|
||||
#include "crimson/os/seastore/seastore_types.h"
|
||||
|
||||
namespace crimson::os::seastore::lba_manager::btree {
|
||||
|
||||
class LBANode;
|
||||
using LBANodeRef = TCachedExtentRef<LBANode>;
|
||||
|
||||
struct lba_node_meta_t {
|
||||
laddr_t begin = 0;
|
||||
laddr_t end = 0;
|
||||
depth_t depth = 0;
|
||||
|
||||
bool is_parent_of(const lba_node_meta_t &other) const {
|
||||
return (depth == other.depth + 1) &&
|
||||
(begin <= other.begin) &&
|
||||
(end > other.begin);
|
||||
}
|
||||
|
||||
std::pair<lba_node_meta_t, lba_node_meta_t> split_into(laddr_t pivot) const {
|
||||
return std::make_pair(
|
||||
lba_node_meta_t{begin, pivot, depth},
|
||||
lba_node_meta_t{pivot, end, depth});
|
||||
}
|
||||
|
||||
static lba_node_meta_t merge_from(
|
||||
const lba_node_meta_t &lhs, const lba_node_meta_t &rhs) {
|
||||
ceph_assert(lhs.depth == rhs.depth);
|
||||
return lba_node_meta_t{lhs.begin, rhs.end, lhs.depth};
|
||||
}
|
||||
|
||||
static std::pair<lba_node_meta_t, lba_node_meta_t>
|
||||
rebalance(const lba_node_meta_t &lhs, const lba_node_meta_t &rhs, laddr_t pivot) {
|
||||
ceph_assert(lhs.depth == rhs.depth);
|
||||
return std::make_pair(
|
||||
lba_node_meta_t{lhs.begin, pivot, lhs.depth},
|
||||
lba_node_meta_t{pivot, rhs.end, lhs.depth});
|
||||
}
|
||||
|
||||
bool is_root() const {
|
||||
return begin == 0 && end == L_ADDR_MAX;
|
||||
}
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(
|
||||
std::ostream &lhs,
|
||||
const lba_node_meta_t &rhs)
|
||||
{
|
||||
return lhs << "btree_node_meta_t("
|
||||
<< "begin=" << rhs.begin
|
||||
<< ", end=" << rhs.end
|
||||
<< ", depth=" << rhs.depth
|
||||
<< ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* btree_range_pin_t
|
||||
*
|
||||
* Element tracked by btree_pin_set_t below. Encapsulates the intrusive_set
|
||||
* hook, the lba_node_meta_t representing the lba range covered by a node,
|
||||
* and extent and ref members intended to hold a reference when the extent
|
||||
* should be pinned.
|
||||
*/
|
||||
class btree_pin_set_t;
|
||||
class btree_range_pin_t : public boost::intrusive::set_base_hook<> {
|
||||
friend class btree_pin_set_t;
|
||||
lba_node_meta_t range;
|
||||
|
||||
btree_pin_set_t *pins = nullptr;
|
||||
|
||||
// We need to be able to remember extent without holding a reference,
|
||||
// but we can do it more compactly -- TODO
|
||||
CachedExtent *extent = nullptr;
|
||||
CachedExtentRef ref;
|
||||
|
||||
using index_t = boost::intrusive::set<btree_range_pin_t>;
|
||||
|
||||
static auto get_tuple(const lba_node_meta_t &meta) {
|
||||
return std::make_tuple(-meta.depth, meta.begin);
|
||||
}
|
||||
|
||||
void acquire_ref() {
|
||||
ref = CachedExtentRef(extent);
|
||||
}
|
||||
|
||||
void drop_ref() {
|
||||
ref.reset();
|
||||
}
|
||||
|
||||
public:
|
||||
btree_range_pin_t() = default;
|
||||
btree_range_pin_t(CachedExtent *extent)
|
||||
: extent(extent) {}
|
||||
btree_range_pin_t(const btree_range_pin_t &rhs, CachedExtent *extent)
|
||||
: range(rhs.range), extent(extent) {}
|
||||
|
||||
bool has_ref() const {
|
||||
return !!ref;
|
||||
}
|
||||
|
||||
bool is_root() const {
|
||||
return range.is_root();
|
||||
}
|
||||
|
||||
void set_range(const lba_node_meta_t &nrange) {
|
||||
range = nrange;
|
||||
}
|
||||
void set_extent(CachedExtent *nextent) {
|
||||
ceph_assert(!extent);
|
||||
extent = nextent;
|
||||
}
|
||||
|
||||
CachedExtent &get_extent() {
|
||||
assert(extent);
|
||||
return *extent;
|
||||
}
|
||||
|
||||
bool has_ref() {
|
||||
return !!ref;
|
||||
}
|
||||
|
||||
void take_pin(btree_range_pin_t &other);
|
||||
|
||||
friend bool operator<(
|
||||
const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
|
||||
return get_tuple(lhs.range) < get_tuple(rhs.range);
|
||||
}
|
||||
friend bool operator>(
|
||||
const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
|
||||
return get_tuple(lhs.range) > get_tuple(rhs.range);
|
||||
}
|
||||
friend bool operator==(
|
||||
const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
|
||||
return get_tuple(lhs.range) == rhs.get_tuple(rhs.range);
|
||||
}
|
||||
|
||||
struct meta_cmp_t {
|
||||
bool operator()(
|
||||
const btree_range_pin_t &lhs, const lba_node_meta_t &rhs) const {
|
||||
return get_tuple(lhs.range) < get_tuple(rhs);
|
||||
}
|
||||
bool operator()(
|
||||
const lba_node_meta_t &lhs, const btree_range_pin_t &rhs) const {
|
||||
return get_tuple(lhs) < get_tuple(rhs.range);
|
||||
}
|
||||
};
|
||||
|
||||
friend std::ostream &operator<<(
|
||||
std::ostream &lhs,
|
||||
const btree_range_pin_t &rhs) {
|
||||
return lhs << "btree_range_pin_t("
|
||||
<< "begin=" << rhs.range.begin
|
||||
<< ", end=" << rhs.range.end
|
||||
<< ", depth=" << rhs.range.depth
|
||||
<< ", extent=" << rhs.extent
|
||||
<< ")";
|
||||
}
|
||||
|
||||
friend class BtreeLBAPin;
|
||||
~btree_range_pin_t();
|
||||
};
|
||||
|
||||
/**
|
||||
* btree_pin_set_t
|
||||
*
|
||||
* Ensures that for every cached node, all parent LBANodes required
|
||||
* to map it are present in cache. Relocating these nodes can
|
||||
* therefore be done without further reads or cache space.
|
||||
*
|
||||
* Contains a btree_range_pin_t for every clean or dirty LBANode
|
||||
* or LogicalCachedExtent instance in cache at any point in time.
|
||||
* For any LBANode, the contained btree_range_pin_t will hold
|
||||
* a reference to that node pinning it in cache as long as that
|
||||
* node has children in the set. This invariant can be violated
|
||||
* only by calling retire_extent and is repaired by calling
|
||||
* check_parent synchronously after adding any new extents.
|
||||
*/
|
||||
class btree_pin_set_t {
|
||||
friend class btree_range_pin_t;
|
||||
using pins_t = btree_range_pin_t::index_t;
|
||||
pins_t pins;
|
||||
|
||||
/// Removes pin from set optionally checking whether parent has other children
|
||||
void remove_pin(btree_range_pin_t &pin, bool check_parent);
|
||||
|
||||
void replace_pin(btree_range_pin_t &to, btree_range_pin_t &from);
|
||||
|
||||
/// Returns parent pin if exists
|
||||
btree_range_pin_t *maybe_get_parent(const lba_node_meta_t &pin);
|
||||
|
||||
/// Returns earliest child pin if exist
|
||||
const btree_range_pin_t *maybe_get_first_child(const lba_node_meta_t &pin) const;
|
||||
|
||||
/// Releases pin if it has no children
|
||||
void release_if_no_children(btree_range_pin_t &pin);
|
||||
|
||||
public:
|
||||
/// Adds pin to set, assumes set is consistent
|
||||
void add_pin(btree_range_pin_t &pin);
|
||||
|
||||
/**
|
||||
* retire/check_parent
|
||||
*
|
||||
* See BtreeLBAManager::complete_transaction.
|
||||
* retire removes the specified pin from the set, but does not
|
||||
* check parents. After any new extents are added to the set,
|
||||
* the caller is required to call check_parent to restore the
|
||||
* invariant.
|
||||
*/
|
||||
void retire(btree_range_pin_t &pin);
|
||||
void check_parent(btree_range_pin_t &pin);
|
||||
|
||||
template <typename F>
|
||||
void scan(F &&f) {
|
||||
for (auto &i : pins) {
|
||||
std::invoke(f, i);
|
||||
}
|
||||
}
|
||||
|
||||
~btree_pin_set_t() {
|
||||
ceph_assert(pins.empty());
|
||||
}
|
||||
};
|
||||
|
||||
class BtreeLBAPin : public LBAPin {
|
||||
friend class BtreeLBAManager;
|
||||
friend class LBABtree;
|
||||
|
||||
/**
|
||||
* parent
|
||||
*
|
||||
* populated until link_extent is called to ensure cache residence
|
||||
* until add_pin is called.
|
||||
*/
|
||||
CachedExtentRef parent;
|
||||
|
||||
paddr_t paddr;
|
||||
btree_range_pin_t pin;
|
||||
|
||||
public:
|
||||
BtreeLBAPin() = default;
|
||||
|
||||
BtreeLBAPin(
|
||||
CachedExtentRef parent,
|
||||
paddr_t paddr,
|
||||
lba_node_meta_t &&meta)
|
||||
: parent(parent), paddr(paddr) {
|
||||
pin.set_range(std::move(meta));
|
||||
}
|
||||
|
||||
void link_extent(LogicalCachedExtent *ref) final {
|
||||
pin.set_extent(ref);
|
||||
}
|
||||
|
||||
extent_len_t get_length() const final {
|
||||
ceph_assert(pin.range.end > pin.range.begin);
|
||||
return pin.range.end - pin.range.begin;
|
||||
}
|
||||
|
||||
paddr_t get_paddr() const final {
|
||||
return paddr;
|
||||
}
|
||||
|
||||
laddr_t get_laddr() const final {
|
||||
return pin.range.begin;
|
||||
}
|
||||
|
||||
LBAPinRef duplicate() const final {
|
||||
auto ret = std::unique_ptr<BtreeLBAPin>(new BtreeLBAPin);
|
||||
ret->pin.set_range(pin.range);
|
||||
ret->paddr = paddr;
|
||||
ret->parent = parent;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void take_pin(LBAPin &opin) final {
|
||||
pin.take_pin(static_cast<BtreeLBAPin&>(opin).pin);
|
||||
}
|
||||
|
||||
bool has_been_invalidated() const final {
|
||||
return parent->has_been_invalidated();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,702 +0,0 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <sys/mman.h>
|
||||
#include <memory>
|
||||
#include <string.h>
|
||||
|
||||
#include "crimson/os/seastore/lba_manager.h"
|
||||
#include "crimson/os/seastore/logging.h"
|
||||
#include "crimson/os/seastore/seastore_types.h"
|
||||
#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
|
||||
|
||||
namespace crimson::os::seastore::lba_manager::btree {
|
||||
|
||||
|
||||
class LBABtree {
|
||||
static constexpr size_t MAX_DEPTH = 16;
|
||||
public:
|
||||
using base_iertr = LBAManager::base_iertr;
|
||||
|
||||
class iterator;
|
||||
using iterator_fut = base_iertr::future<iterator>;
|
||||
|
||||
using mapped_space_visitor_t = LBAManager::scan_mapped_space_func_t;
|
||||
|
||||
class iterator {
|
||||
public:
|
||||
iterator(const iterator &rhs) noexcept :
|
||||
internal(rhs.internal), leaf(rhs.leaf) {}
|
||||
iterator(iterator &&rhs) noexcept :
|
||||
internal(std::move(rhs.internal)), leaf(std::move(rhs.leaf)) {}
|
||||
|
||||
iterator &operator=(const iterator &) = default;
|
||||
iterator &operator=(iterator &&) = default;
|
||||
|
||||
iterator_fut next(
|
||||
op_context_t c,
|
||||
mapped_space_visitor_t *visit=nullptr) const;
|
||||
|
||||
iterator_fut prev(op_context_t c) const;
|
||||
|
||||
void assert_valid() const {
|
||||
assert(leaf.node);
|
||||
assert(leaf.pos <= leaf.node->get_size());
|
||||
|
||||
for (auto &i: internal) {
|
||||
(void)i;
|
||||
assert(i.node);
|
||||
assert(i.pos < i.node->get_size());
|
||||
}
|
||||
}
|
||||
|
||||
depth_t get_depth() const {
|
||||
return internal.size() + 1;
|
||||
}
|
||||
|
||||
auto &get_internal(depth_t depth) {
|
||||
assert(depth > 1);
|
||||
assert((depth - 2) < internal.size());
|
||||
return internal[depth - 2];
|
||||
}
|
||||
|
||||
const auto &get_internal(depth_t depth) const {
|
||||
assert(depth > 1);
|
||||
assert((depth - 2) < internal.size());
|
||||
return internal[depth - 2];
|
||||
}
|
||||
|
||||
laddr_t get_key() const {
|
||||
assert(!is_end());
|
||||
return leaf.node->iter_idx(leaf.pos).get_key();
|
||||
}
|
||||
lba_map_val_t get_val() const {
|
||||
assert(!is_end());
|
||||
auto ret = leaf.node->iter_idx(leaf.pos).get_val();
|
||||
ret.paddr = ret.paddr.maybe_relative_to(leaf.node->get_paddr());
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool is_end() const {
|
||||
// external methods may only resolve at a boundary if at end
|
||||
return at_boundary();
|
||||
}
|
||||
|
||||
bool is_begin() const {
|
||||
for (auto &i: internal) {
|
||||
if (i.pos != 0)
|
||||
return false;
|
||||
}
|
||||
return leaf.pos == 0;
|
||||
}
|
||||
|
||||
LBAPinRef get_pin() const {
|
||||
assert(!is_end());
|
||||
auto val = get_val();
|
||||
auto key = get_key();
|
||||
return std::make_unique<BtreeLBAPin>(
|
||||
leaf.node,
|
||||
val.paddr,
|
||||
lba_node_meta_t{ key, key + val.len, 0 });
|
||||
}
|
||||
|
||||
private:
|
||||
iterator() noexcept {}
|
||||
iterator(depth_t depth) noexcept : internal(depth - 1) {}
|
||||
|
||||
friend class LBABtree;
|
||||
static constexpr uint16_t INVALID = std::numeric_limits<uint16_t>::max();
|
||||
template <typename NodeType>
|
||||
struct node_position_t {
|
||||
typename NodeType::Ref node;
|
||||
uint16_t pos = INVALID;
|
||||
|
||||
void reset() {
|
||||
*this = node_position_t{};
|
||||
}
|
||||
|
||||
auto get_iter() {
|
||||
assert(pos != INVALID);
|
||||
assert(pos < node->get_size());
|
||||
return node->iter_idx(pos);
|
||||
}
|
||||
};
|
||||
boost::container::static_vector<
|
||||
node_position_t<LBAInternalNode>, MAX_DEPTH> internal;
|
||||
node_position_t<LBALeafNode> leaf;
|
||||
|
||||
bool at_boundary() const {
|
||||
assert(leaf.pos <= leaf.node->get_size());
|
||||
return leaf.pos == leaf.node->get_size();
|
||||
}
|
||||
|
||||
using handle_boundary_ertr = base_iertr;
|
||||
using handle_boundary_ret = handle_boundary_ertr::future<>;
|
||||
handle_boundary_ret handle_boundary(
|
||||
op_context_t c,
|
||||
mapped_space_visitor_t *visitor);
|
||||
|
||||
depth_t check_split() const {
|
||||
if (!leaf.node->at_max_capacity()) {
|
||||
return 0;
|
||||
}
|
||||
for (depth_t split_from = 1; split_from < get_depth(); ++split_from) {
|
||||
if (!get_internal(split_from + 1).node->at_max_capacity())
|
||||
return split_from;
|
||||
}
|
||||
return get_depth();
|
||||
}
|
||||
|
||||
depth_t check_merge() const {
|
||||
if (!leaf.node->below_min_capacity()) {
|
||||
return 0;
|
||||
}
|
||||
for (depth_t merge_from = 1; merge_from < get_depth(); ++merge_from) {
|
||||
if (!get_internal(merge_from + 1).node->below_min_capacity())
|
||||
return merge_from;
|
||||
}
|
||||
return get_depth();
|
||||
}
|
||||
};
|
||||
|
||||
LBABtree(lba_root_t root) : root(root) {}
|
||||
|
||||
bool is_root_dirty() const {
|
||||
return root_dirty;
|
||||
}
|
||||
lba_root_t get_root_undirty() {
|
||||
ceph_assert(root_dirty);
|
||||
root_dirty = false;
|
||||
return root;
|
||||
}
|
||||
|
||||
/// mkfs
|
||||
using mkfs_ret = lba_root_t;
|
||||
static mkfs_ret mkfs(op_context_t c);
|
||||
|
||||
/**
|
||||
* lower_bound
|
||||
*
|
||||
* @param c [in] context
|
||||
* @param addr [in] ddr
|
||||
* @return least iterator >= key
|
||||
*/
|
||||
iterator_fut lower_bound(
|
||||
op_context_t c,
|
||||
laddr_t addr,
|
||||
mapped_space_visitor_t *visit=nullptr) const;
|
||||
|
||||
/**
|
||||
* upper_bound
|
||||
*
|
||||
* @param c [in] context
|
||||
* @param addr [in] ddr
|
||||
* @return least iterator > key
|
||||
*/
|
||||
iterator_fut upper_bound(
|
||||
op_context_t c,
|
||||
laddr_t addr
|
||||
) const {
|
||||
return lower_bound(
|
||||
c, addr
|
||||
).si_then([c, addr](auto iter) {
|
||||
if (!iter.is_end() && iter.get_key() == addr) {
|
||||
return iter.next(c);
|
||||
} else {
|
||||
return iterator_fut(
|
||||
interruptible::ready_future_marker{},
|
||||
iter);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* upper_bound_right
|
||||
*
|
||||
* @param c [in] context
|
||||
* @param addr [in] addr
|
||||
* @return least iterator i s.t. i.get_key() + i.get_val().len > key
|
||||
*/
|
||||
iterator_fut upper_bound_right(
|
||||
op_context_t c,
|
||||
laddr_t addr) const
|
||||
{
|
||||
return lower_bound(
|
||||
c, addr
|
||||
).si_then([c, addr](auto iter) {
|
||||
if (iter.is_begin()) {
|
||||
return iterator_fut(
|
||||
interruptible::ready_future_marker{},
|
||||
iter);
|
||||
} else {
|
||||
return iter.prev(
|
||||
c
|
||||
).si_then([iter, addr](auto prev) {
|
||||
if ((prev.get_key() + prev.get_val().len) > addr) {
|
||||
return iterator_fut(
|
||||
interruptible::ready_future_marker{},
|
||||
prev);
|
||||
} else {
|
||||
return iterator_fut(
|
||||
interruptible::ready_future_marker{},
|
||||
iter);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
iterator_fut begin(op_context_t c) const {
|
||||
return lower_bound(c, 0);
|
||||
}
|
||||
iterator_fut end(op_context_t c) const {
|
||||
return upper_bound(c, L_ADDR_MAX);
|
||||
}
|
||||
|
||||
using iterate_repeat_ret_inner = base_iertr::future<
|
||||
seastar::stop_iteration>;
|
||||
template <typename F>
|
||||
static base_iertr::future<> iterate_repeat(
|
||||
op_context_t c,
|
||||
iterator_fut &&iter_fut,
|
||||
F &&f,
|
||||
mapped_space_visitor_t *visitor=nullptr) {
|
||||
return std::move(
|
||||
iter_fut
|
||||
).si_then([c, visitor, f=std::forward<F>(f)](auto iter) {
|
||||
return seastar::do_with(
|
||||
iter,
|
||||
std::move(f),
|
||||
[c, visitor](auto &pos, auto &f) {
|
||||
return trans_intr::repeat(
|
||||
[c, visitor, &f, &pos] {
|
||||
return f(
|
||||
pos
|
||||
).si_then([c, visitor, &pos](auto done) {
|
||||
if (done == seastar::stop_iteration::yes) {
|
||||
return iterate_repeat_ret_inner(
|
||||
interruptible::ready_future_marker{},
|
||||
seastar::stop_iteration::yes);
|
||||
} else {
|
||||
ceph_assert(!pos.is_end());
|
||||
return pos.next(
|
||||
c, visitor
|
||||
).si_then([&pos](auto next) {
|
||||
pos = next;
|
||||
return iterate_repeat_ret_inner(
|
||||
interruptible::ready_future_marker{},
|
||||
seastar::stop_iteration::no);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* insert
|
||||
*
|
||||
* Inserts val at laddr with iter as a hint. If element at laddr already
|
||||
* exists returns iterator to that element unchanged and returns false.
|
||||
*
|
||||
* Invalidates all outstanding iterators for this tree on this transaction.
|
||||
*
|
||||
* @param c [in] op context
|
||||
* @param iter [in] hint, insertion constant if immediately prior to iter
|
||||
* @param laddr [in] addr at which to insert
|
||||
* @param val [in] val to insert
|
||||
* @return pair<iter, bool> where iter points to element at addr, bool true
|
||||
* iff element at laddr did not exist.
|
||||
*/
|
||||
using insert_iertr = base_iertr;
|
||||
using insert_ret = insert_iertr::future<std::pair<iterator, bool>>;
|
||||
insert_ret insert(
|
||||
op_context_t c,
|
||||
iterator iter,
|
||||
laddr_t laddr,
|
||||
lba_map_val_t val
|
||||
);
|
||||
insert_ret insert(
|
||||
op_context_t c,
|
||||
laddr_t laddr,
|
||||
lba_map_val_t val) {
|
||||
return lower_bound(
|
||||
c, laddr
|
||||
).si_then([this, c, laddr, val](auto iter) {
|
||||
return insert(c, iter, laddr, val);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* update
|
||||
*
|
||||
* Invalidates all outstanding iterators for this tree on this transaction.
|
||||
*
|
||||
* @param c [in] op context
|
||||
* @param iter [in] iterator to element to update, must not be end
|
||||
* @param val [in] val with which to update
|
||||
* @return iterator to newly updated element
|
||||
*/
|
||||
using update_iertr = base_iertr;
|
||||
using update_ret = update_iertr::future<iterator>;
|
||||
update_ret update(
|
||||
op_context_t c,
|
||||
iterator iter,
|
||||
lba_map_val_t val);
|
||||
|
||||
/**
|
||||
* remove
|
||||
*
|
||||
* Invalidates all outstanding iterators for this tree on this transaction.
|
||||
*
|
||||
* @param c [in] op context
|
||||
* @param iter [in] iterator to element to remove, must not be end
|
||||
*/
|
||||
using remove_iertr = base_iertr;
|
||||
using remove_ret = remove_iertr::future<>;
|
||||
remove_ret remove(
|
||||
op_context_t c,
|
||||
iterator iter);
|
||||
|
||||
/**
|
||||
* init_cached_extent
|
||||
*
|
||||
* Checks whether e is live (reachable from lba tree) and drops or initializes
|
||||
* accordingly.
|
||||
*
|
||||
* Returns if e is live.
|
||||
*/
|
||||
using init_cached_extent_iertr = base_iertr;
|
||||
using init_cached_extent_ret = init_cached_extent_iertr::future<bool>;
|
||||
init_cached_extent_ret init_cached_extent(op_context_t c, CachedExtentRef e);
|
||||
|
||||
/// get_leaf_if_live: get leaf node at laddr/addr if still live
|
||||
using get_leaf_if_live_iertr = base_iertr;
|
||||
using get_leaf_if_live_ret = get_leaf_if_live_iertr::future<CachedExtentRef>;
|
||||
get_leaf_if_live_ret get_leaf_if_live(
|
||||
op_context_t c,
|
||||
paddr_t addr,
|
||||
laddr_t laddr,
|
||||
seastore_off_t len);
|
||||
|
||||
/// get_internal_if_live: get internal node at laddr/addr if still live
|
||||
using get_internal_if_live_iertr = base_iertr;
|
||||
using get_internal_if_live_ret = get_internal_if_live_iertr::future<CachedExtentRef>;
|
||||
get_internal_if_live_ret get_internal_if_live(
|
||||
op_context_t c,
|
||||
paddr_t addr,
|
||||
laddr_t laddr,
|
||||
seastore_off_t len);
|
||||
|
||||
/**
|
||||
* rewrite_lba_extent
|
||||
*
|
||||
* Rewrites a fresh copy of extent into transaction and updates internal
|
||||
* references.
|
||||
*/
|
||||
using rewrite_lba_extent_iertr = base_iertr;
|
||||
using rewrite_lba_extent_ret = rewrite_lba_extent_iertr::future<>;
|
||||
rewrite_lba_extent_ret rewrite_lba_extent(op_context_t c, CachedExtentRef e);
|
||||
|
||||
private:
|
||||
lba_root_t root;
|
||||
bool root_dirty = false;
|
||||
|
||||
using get_internal_node_iertr = base_iertr;
|
||||
using get_internal_node_ret = get_internal_node_iertr::future<LBAInternalNodeRef>;
|
||||
static get_internal_node_ret get_internal_node(
|
||||
op_context_t c,
|
||||
depth_t depth,
|
||||
paddr_t offset,
|
||||
laddr_t begin,
|
||||
laddr_t end);
|
||||
|
||||
using get_leaf_node_iertr = base_iertr;
|
||||
using get_leaf_node_ret = get_leaf_node_iertr::future<LBALeafNodeRef>;
|
||||
static get_leaf_node_ret get_leaf_node(
|
||||
op_context_t c,
|
||||
paddr_t offset,
|
||||
laddr_t begin,
|
||||
laddr_t end);
|
||||
|
||||
using lookup_root_iertr = base_iertr;
|
||||
using lookup_root_ret = lookup_root_iertr::future<>;
|
||||
lookup_root_ret lookup_root(
|
||||
op_context_t c,
|
||||
iterator &iter,
|
||||
mapped_space_visitor_t *visitor) const {
|
||||
if (root.get_depth() > 1) {
|
||||
return get_internal_node(
|
||||
c,
|
||||
root.get_depth(),
|
||||
root.get_location(),
|
||||
0,
|
||||
L_ADDR_MAX
|
||||
).si_then([this, visitor, &iter](LBAInternalNodeRef root_node) {
|
||||
iter.get_internal(root.get_depth()).node = root_node;
|
||||
if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
|
||||
return lookup_root_iertr::now();
|
||||
});
|
||||
} else {
|
||||
return get_leaf_node(
|
||||
c,
|
||||
root.get_location(),
|
||||
0,
|
||||
L_ADDR_MAX
|
||||
).si_then([visitor, &iter](LBALeafNodeRef root_node) {
|
||||
iter.leaf.node = root_node;
|
||||
if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
|
||||
return lookup_root_iertr::now();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
using lookup_internal_level_iertr = base_iertr;
|
||||
using lookup_internal_level_ret = lookup_internal_level_iertr::future<>;
|
||||
template <typename F>
|
||||
static lookup_internal_level_ret lookup_internal_level(
|
||||
op_context_t c,
|
||||
depth_t depth,
|
||||
iterator &iter,
|
||||
F &f,
|
||||
mapped_space_visitor_t *visitor
|
||||
) {
|
||||
assert(depth > 1);
|
||||
auto &parent_entry = iter.get_internal(depth + 1);
|
||||
auto parent = parent_entry.node;
|
||||
auto node_iter = parent->iter_idx(parent_entry.pos);
|
||||
auto next_iter = node_iter + 1;
|
||||
auto begin = node_iter->get_key();
|
||||
auto end = next_iter == parent->end()
|
||||
? parent->get_node_meta().end
|
||||
: next_iter->get_key();
|
||||
return get_internal_node(
|
||||
c,
|
||||
depth,
|
||||
node_iter->get_val().maybe_relative_to(parent->get_paddr()),
|
||||
begin,
|
||||
end
|
||||
).si_then([depth, visitor, &iter, &f](LBAInternalNodeRef node) {
|
||||
auto &entry = iter.get_internal(depth);
|
||||
entry.node = node;
|
||||
auto node_iter = f(*node);
|
||||
assert(node_iter != node->end());
|
||||
entry.pos = node_iter->get_offset();
|
||||
if (visitor) (*visitor)(node->get_paddr(), node->get_length());
|
||||
return seastar::now();
|
||||
});
|
||||
}
|
||||
|
||||
using lookup_leaf_iertr = base_iertr;
|
||||
using lookup_leaf_ret = lookup_leaf_iertr::future<>;
|
||||
template <typename F>
|
||||
static lookup_internal_level_ret lookup_leaf(
|
||||
op_context_t c,
|
||||
iterator &iter,
|
||||
F &f,
|
||||
mapped_space_visitor_t *visitor
|
||||
) {
|
||||
auto &parent_entry = iter.get_internal(2);
|
||||
auto parent = parent_entry.node;
|
||||
assert(parent);
|
||||
auto node_iter = parent->iter_idx(parent_entry.pos);
|
||||
auto next_iter = node_iter + 1;
|
||||
auto begin = node_iter->get_key();
|
||||
auto end = next_iter == parent->end()
|
||||
? parent->get_node_meta().end
|
||||
: next_iter->get_key();
|
||||
|
||||
return get_leaf_node(
|
||||
c,
|
||||
node_iter->get_val().maybe_relative_to(parent->get_paddr()),
|
||||
begin,
|
||||
end
|
||||
).si_then([visitor, &iter, &f](LBALeafNodeRef node) {
|
||||
iter.leaf.node = node;
|
||||
auto node_iter = f(*node);
|
||||
iter.leaf.pos = node_iter->get_offset();
|
||||
if (visitor) (*visitor)(node->get_paddr(), node->get_length());
|
||||
return seastar::now();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* lookup_depth_range
|
||||
*
|
||||
* Performs node lookups on depths [from, to) using li and ll to
|
||||
* specific target at each level. Note, may leave the iterator
|
||||
* at_boundary(), call handle_boundary() prior to returning out
|
||||
* lf LBABtree.
|
||||
*/
|
||||
using lookup_depth_range_iertr = base_iertr;
|
||||
using lookup_depth_range_ret = lookup_depth_range_iertr::future<>;
|
||||
template <typename LI, typename LL>
|
||||
static lookup_depth_range_ret lookup_depth_range(
|
||||
op_context_t c, ///< [in] context
|
||||
iterator &iter, ///< [in,out] iterator to populate
|
||||
depth_t from, ///< [in] from inclusive
|
||||
depth_t to, ///< [in] to exclusive, (to <= from, to == from is a noop)
|
||||
LI &li, ///< [in] internal->iterator
|
||||
LL &ll, ///< [in] leaf->iterator
|
||||
mapped_space_visitor_t *visitor ///< [in] mapped space visitor
|
||||
) {
|
||||
LOG_PREFIX(LBATree::lookup_depth_range);
|
||||
SUBDEBUGT(seastore_lba_details, "{} -> {}", c.trans, from, to);
|
||||
return seastar::do_with(
|
||||
from,
|
||||
[c, to, visitor, &iter, &li, &ll](auto &d) {
|
||||
return trans_intr::repeat(
|
||||
[c, to, visitor, &iter, &li, &ll, &d] {
|
||||
if (d > to) {
|
||||
return [&] {
|
||||
if (d > 1) {
|
||||
return lookup_internal_level(
|
||||
c,
|
||||
d,
|
||||
iter,
|
||||
li,
|
||||
visitor);
|
||||
} else {
|
||||
assert(d == 1);
|
||||
return lookup_leaf(
|
||||
c,
|
||||
iter,
|
||||
ll,
|
||||
visitor);
|
||||
}
|
||||
}().si_then([&d] {
|
||||
--d;
|
||||
return lookup_depth_range_iertr::make_ready_future<
|
||||
seastar::stop_iteration
|
||||
>(seastar::stop_iteration::no);
|
||||
});
|
||||
} else {
|
||||
return lookup_depth_range_iertr::make_ready_future<
|
||||
seastar::stop_iteration
|
||||
>(seastar::stop_iteration::yes);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
using lookup_iertr = base_iertr;
|
||||
using lookup_ret = lookup_iertr::future<iterator>;
|
||||
template <typename LI, typename LL>
|
||||
lookup_ret lookup(
|
||||
op_context_t c,
|
||||
LI &&lookup_internal,
|
||||
LL &&lookup_leaf,
|
||||
mapped_space_visitor_t *visitor
|
||||
) const {
|
||||
LOG_PREFIX(LBATree::lookup);
|
||||
return seastar::do_with(
|
||||
iterator{root.get_depth()},
|
||||
std::forward<LI>(lookup_internal),
|
||||
std::forward<LL>(lookup_leaf),
|
||||
[FNAME, this, visitor, c](auto &iter, auto &li, auto &ll) {
|
||||
return lookup_root(
|
||||
c, iter, visitor
|
||||
).si_then([FNAME, this, visitor, c, &iter, &li, &ll] {
|
||||
if (iter.get_depth() > 1) {
|
||||
auto &root_entry = *(iter.internal.rbegin());
|
||||
root_entry.pos = li(*(root_entry.node)).get_offset();
|
||||
} else {
|
||||
auto &root_entry = iter.leaf;
|
||||
auto riter = ll(*(root_entry.node));
|
||||
root_entry.pos = riter->get_offset();
|
||||
}
|
||||
SUBDEBUGT(seastore_lba_details, "got root, depth {}", c.trans, root.get_depth());
|
||||
return lookup_depth_range(
|
||||
c,
|
||||
iter,
|
||||
root.get_depth() - 1,
|
||||
0,
|
||||
li,
|
||||
ll,
|
||||
visitor
|
||||
).si_then([c, visitor, &iter] {
|
||||
if (iter.at_boundary()) {
|
||||
return iter.handle_boundary(c, visitor);
|
||||
} else {
|
||||
return lookup_iertr::now();
|
||||
}
|
||||
});
|
||||
}).si_then([&iter] {
|
||||
return std::move(iter);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_split
|
||||
*
|
||||
* Prepare iter for insertion. iter should begin pointing at
|
||||
* the valid insertion point (lower_bound(laddr)).
|
||||
*
|
||||
* Upon completion, iter will point at the
|
||||
* position at which laddr should be inserted. iter may, upon completion,
|
||||
* point at the end of a leaf other than the end leaf if that's the correct
|
||||
* insertion point.
|
||||
*/
|
||||
using find_insertion_iertr = base_iertr;
|
||||
using find_insertion_ret = find_insertion_iertr::future<>;
|
||||
static find_insertion_ret find_insertion(
|
||||
op_context_t c,
|
||||
laddr_t laddr,
|
||||
iterator &iter);
|
||||
|
||||
/**
|
||||
* handle_split
|
||||
*
|
||||
* Split nodes in iter as needed for insertion. First, scan iter from leaf
|
||||
* to find first non-full level. Then, split from there towards leaf.
|
||||
*
|
||||
* Upon completion, iter will point at the newly split insertion point. As
|
||||
* with find_insertion, iter's leaf pointer may be end without iter being
|
||||
* end.
|
||||
*/
|
||||
using handle_split_iertr = base_iertr;
|
||||
using handle_split_ret = handle_split_iertr::future<>;
|
||||
handle_split_ret handle_split(
|
||||
op_context_t c,
|
||||
iterator &iter);
|
||||
|
||||
using handle_merge_iertr = base_iertr;
|
||||
using handle_merge_ret = handle_merge_iertr::future<>;
|
||||
handle_merge_ret handle_merge(
|
||||
op_context_t c,
|
||||
iterator &iter);
|
||||
|
||||
using update_internal_mapping_iertr = base_iertr;
|
||||
using update_internal_mapping_ret = update_internal_mapping_iertr::future<>;
|
||||
update_internal_mapping_ret update_internal_mapping(
|
||||
op_context_t c,
|
||||
depth_t depth,
|
||||
laddr_t laddr,
|
||||
paddr_t old_addr,
|
||||
paddr_t new_addr);
|
||||
|
||||
template <typename T>
|
||||
using node_position_t = iterator::node_position_t<T>;
|
||||
|
||||
template <typename NodeType>
|
||||
friend base_iertr::future<typename NodeType::Ref> get_node(
|
||||
op_context_t c,
|
||||
depth_t depth,
|
||||
paddr_t addr,
|
||||
laddr_t begin,
|
||||
laddr_t end);
|
||||
|
||||
template <typename NodeType>
|
||||
friend handle_merge_ret merge_level(
|
||||
op_context_t c,
|
||||
depth_t depth,
|
||||
node_position_t<LBAInternalNode> &parent_pos,
|
||||
node_position_t<NodeType> &pos);
|
||||
};
|
||||
|
||||
}
|
@ -27,24 +27,6 @@ std::ostream& operator<<(std::ostream& out, const lba_map_val_t& v)
|
||||
<< ")";
|
||||
}
|
||||
|
||||
std::ostream &LBAInternalNode::print_detail(std::ostream &out) const
|
||||
{
|
||||
return out << ", size=" << get_size()
|
||||
<< ", meta=" << get_meta();
|
||||
}
|
||||
|
||||
void LBAInternalNode::resolve_relative_addrs(paddr_t base)
|
||||
{
|
||||
LOG_PREFIX(LBAInternalNode::resolve_relative_addrs);
|
||||
for (auto i: *this) {
|
||||
if (i->get_val().is_relative()) {
|
||||
auto updated = base.add_relative(i->get_val());
|
||||
TRACE("{} -> {}", i->get_val(), updated);
|
||||
i->set_val(updated);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &LBALeafNode::print_detail(std::ostream &out) const
|
||||
{
|
||||
return out << ", size=" << get_size()
|
||||
|
@ -16,18 +16,15 @@
|
||||
#include "crimson/os/seastore/seastore_types.h"
|
||||
#include "crimson/os/seastore/cache.h"
|
||||
#include "crimson/os/seastore/cached_extent.h"
|
||||
#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
|
||||
#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
|
||||
|
||||
#include "crimson/os/seastore/btree/btree_range_pin.h"
|
||||
#include "crimson/os/seastore/btree/fixed_kv_btree.h"
|
||||
#include "crimson/os/seastore/btree/fixed_kv_node.h"
|
||||
|
||||
namespace crimson::os::seastore::lba_manager::btree {
|
||||
|
||||
using base_iertr = LBAManager::base_iertr;
|
||||
|
||||
struct op_context_t {
|
||||
Cache &cache;
|
||||
Transaction &trans;
|
||||
btree_pin_set_t *pins = nullptr;
|
||||
};
|
||||
using LBANode = FixedKVNode<laddr_t>;
|
||||
|
||||
/**
|
||||
* lba_map_val_t
|
||||
@ -57,71 +54,11 @@ WRITE_EQ_OPERATORS_4(
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const lba_map_val_t&);
|
||||
|
||||
class BtreeLBAPin;
|
||||
using BtreeLBAPinRef = std::unique_ptr<BtreeLBAPin>;
|
||||
|
||||
constexpr size_t LBA_BLOCK_SIZE = 4096;
|
||||
|
||||
/**
|
||||
* lba_node_meta_le_t
|
||||
*
|
||||
* On disk layout for lba_node_meta_t
|
||||
*/
|
||||
struct lba_node_meta_le_t {
|
||||
laddr_le_t begin = laddr_le_t(0);
|
||||
laddr_le_t end = laddr_le_t(0);
|
||||
depth_le_t depth = init_depth_le(0);
|
||||
using lba_node_meta_t = fixed_kv_node_meta_t<laddr_t>;
|
||||
|
||||
lba_node_meta_le_t() = default;
|
||||
lba_node_meta_le_t(const lba_node_meta_le_t &) = default;
|
||||
explicit lba_node_meta_le_t(const lba_node_meta_t &val)
|
||||
: begin(ceph_le64(val.begin)),
|
||||
end(ceph_le64(val.end)),
|
||||
depth(init_depth_le(val.depth)) {}
|
||||
|
||||
operator lba_node_meta_t() const {
|
||||
return lba_node_meta_t{ begin, end, depth };
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* LBANode
|
||||
*
|
||||
* Base class enabling recursive lookup between internal and leaf nodes.
|
||||
*/
|
||||
struct LBANode : CachedExtent {
|
||||
using LBANodeRef = TCachedExtentRef<LBANode>;
|
||||
|
||||
btree_range_pin_t pin;
|
||||
|
||||
LBANode(ceph::bufferptr &&ptr) : CachedExtent(std::move(ptr)), pin(this) {}
|
||||
LBANode(const LBANode &rhs)
|
||||
: CachedExtent(rhs), pin(rhs.pin, this) {}
|
||||
|
||||
virtual lba_node_meta_t get_node_meta() const = 0;
|
||||
|
||||
virtual ~LBANode() = default;
|
||||
|
||||
void on_delta_write(paddr_t record_block_offset) final {
|
||||
// All in-memory relative addrs are necessarily record-relative
|
||||
assert(get_prior_instance());
|
||||
pin.take_pin(get_prior_instance()->cast<LBANode>()->pin);
|
||||
resolve_relative_addrs(record_block_offset);
|
||||
}
|
||||
|
||||
void on_initial_write() final {
|
||||
// All in-memory relative addrs are necessarily block-relative
|
||||
resolve_relative_addrs(get_paddr());
|
||||
}
|
||||
|
||||
void on_clean_read() final {
|
||||
// From initial write of block, relative addrs are necessarily block-relative
|
||||
resolve_relative_addrs(get_paddr());
|
||||
}
|
||||
|
||||
virtual void resolve_relative_addrs(paddr_t base) = 0;
|
||||
};
|
||||
using LBANodeRef = LBANode::LBANodeRef;
|
||||
using lba_node_meta_le_t = fixed_kv_node_meta_le_t<laddr_le_t>;
|
||||
|
||||
/**
|
||||
* LBAInternalNode
|
||||
@ -142,197 +79,22 @@ using LBANodeRef = LBANode::LBANodeRef;
|
||||
*/
|
||||
constexpr size_t INTERNAL_NODE_CAPACITY = 254;
|
||||
struct LBAInternalNode
|
||||
: LBANode,
|
||||
common::FixedKVNodeLayout<
|
||||
: FixedKVInternalNode<
|
||||
INTERNAL_NODE_CAPACITY,
|
||||
lba_node_meta_t, lba_node_meta_le_t,
|
||||
laddr_t, laddr_le_t,
|
||||
paddr_t, paddr_le_t> {
|
||||
LBA_BLOCK_SIZE,
|
||||
LBAInternalNode> {
|
||||
using Ref = TCachedExtentRef<LBAInternalNode>;
|
||||
using internal_iterator_t = const_iterator;
|
||||
template <typename... T>
|
||||
LBAInternalNode(T&&... t) :
|
||||
LBANode(std::forward<T>(t)...),
|
||||
FixedKVNodeLayout(get_bptr().c_str()) {}
|
||||
FixedKVInternalNode(std::forward<T>(t)...) {}
|
||||
|
||||
static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL;
|
||||
|
||||
lba_node_meta_t get_node_meta() const { return get_meta(); }
|
||||
|
||||
CachedExtentRef duplicate_for_write() final {
|
||||
assert(delta_buffer.empty());
|
||||
return CachedExtentRef(new LBAInternalNode(*this));
|
||||
};
|
||||
|
||||
delta_buffer_t delta_buffer;
|
||||
delta_buffer_t *maybe_get_delta_buffer() {
|
||||
return is_mutation_pending() ? &delta_buffer : nullptr;
|
||||
}
|
||||
|
||||
void update(
|
||||
const_iterator iter,
|
||||
paddr_t addr) {
|
||||
return journal_update(
|
||||
iter,
|
||||
maybe_generate_relative(addr),
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
void insert(
|
||||
const_iterator iter,
|
||||
laddr_t pivot,
|
||||
paddr_t addr) {
|
||||
return journal_insert(
|
||||
iter,
|
||||
pivot,
|
||||
maybe_generate_relative(addr),
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
void remove(const_iterator iter) {
|
||||
return journal_remove(
|
||||
iter,
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
void replace(
|
||||
const_iterator iter,
|
||||
laddr_t pivot,
|
||||
paddr_t addr) {
|
||||
return journal_replace(
|
||||
iter,
|
||||
pivot,
|
||||
maybe_generate_relative(addr),
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
std::tuple<Ref, Ref, laddr_t>
|
||||
make_split_children(op_context_t c) {
|
||||
auto left = c.cache.alloc_new_extent<LBAInternalNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
auto right = c.cache.alloc_new_extent<LBAInternalNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
auto pivot = split_into(*left, *right);
|
||||
left->pin.set_range(left->get_meta());
|
||||
right->pin.set_range(right->get_meta());
|
||||
return std::make_tuple(
|
||||
left,
|
||||
right,
|
||||
pivot);
|
||||
}
|
||||
|
||||
Ref make_full_merge(
|
||||
op_context_t c,
|
||||
Ref &right) {
|
||||
auto replacement = c.cache.alloc_new_extent<LBAInternalNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
replacement->merge_from(*this, *right->cast<LBAInternalNode>());
|
||||
replacement->pin.set_range(replacement->get_meta());
|
||||
return replacement;
|
||||
}
|
||||
|
||||
std::tuple<Ref, Ref, laddr_t>
|
||||
make_balanced(
|
||||
op_context_t c,
|
||||
Ref &_right,
|
||||
bool prefer_left) {
|
||||
ceph_assert(_right->get_type() == get_type());
|
||||
auto &right = *_right->cast<LBAInternalNode>();
|
||||
auto replacement_left = c.cache.alloc_new_extent<LBAInternalNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
auto replacement_right = c.cache.alloc_new_extent<LBAInternalNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
|
||||
auto pivot = balance_into_new_nodes(
|
||||
*this,
|
||||
right,
|
||||
prefer_left,
|
||||
*replacement_left,
|
||||
*replacement_right);
|
||||
|
||||
replacement_left->pin.set_range(replacement_left->get_meta());
|
||||
replacement_right->pin.set_range(replacement_right->get_meta());
|
||||
return std::make_tuple(
|
||||
replacement_left,
|
||||
replacement_right,
|
||||
pivot);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal relative addresses on read or in memory prior to commit
|
||||
* are either record or block relative depending on whether this
|
||||
* physical node is is_initial_pending() or just is_pending().
|
||||
*
|
||||
* User passes appropriate base depending on lifecycle and
|
||||
* resolve_relative_addrs fixes up relative internal references
|
||||
* based on base.
|
||||
*/
|
||||
void resolve_relative_addrs(paddr_t base);
|
||||
void node_resolve_vals(iterator from, iterator to) const final {
|
||||
if (is_initial_pending()) {
|
||||
for (auto i = from; i != to; ++i) {
|
||||
if (i->get_val().is_relative()) {
|
||||
assert(i->get_val().is_block_relative());
|
||||
i->set_val(get_paddr().add_relative(i->get_val()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void node_unresolve_vals(iterator from, iterator to) const final {
|
||||
if (is_initial_pending()) {
|
||||
for (auto i = from; i != to; ++i) {
|
||||
if (i->get_val().is_relative()) {
|
||||
assert(i->get_val().is_record_relative());
|
||||
i->set_val(i->get_val() - get_paddr());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extent_types_t get_type() const final {
|
||||
return TYPE;
|
||||
}
|
||||
|
||||
std::ostream &print_detail(std::ostream &out) const final;
|
||||
|
||||
ceph::bufferlist get_delta() final {
|
||||
ceph::buffer::ptr bptr(delta_buffer.get_bytes());
|
||||
delta_buffer.copy_out(bptr.c_str(), bptr.length());
|
||||
ceph::bufferlist bl;
|
||||
bl.push_back(bptr);
|
||||
return bl;
|
||||
}
|
||||
|
||||
void apply_delta_and_adjust_crc(
|
||||
paddr_t base, const ceph::bufferlist &_bl) final {
|
||||
assert(_bl.length());
|
||||
ceph::bufferlist bl = _bl;
|
||||
bl.rebuild();
|
||||
delta_buffer_t buffer;
|
||||
buffer.copy_in(bl.front().c_str(), bl.front().length());
|
||||
buffer.replay(*this);
|
||||
set_last_committed_crc(get_crc32c());
|
||||
resolve_relative_addrs(base);
|
||||
}
|
||||
|
||||
constexpr static size_t get_min_capacity() {
|
||||
return (get_capacity() - 1) / 2;
|
||||
}
|
||||
|
||||
bool at_max_capacity() const {
|
||||
assert(get_size() <= get_capacity());
|
||||
return get_size() == get_capacity();
|
||||
}
|
||||
|
||||
bool at_min_capacity() const {
|
||||
assert(get_size() >= (get_min_capacity() - 1));
|
||||
return get_size() <= get_min_capacity();
|
||||
}
|
||||
|
||||
bool below_min_capacity() const {
|
||||
assert(get_size() >= (get_min_capacity() - 1));
|
||||
return get_size() < get_min_capacity();
|
||||
}
|
||||
};
|
||||
using LBAInternalNodeRef = LBAInternalNode::Ref;
|
||||
|
||||
@ -380,36 +142,23 @@ struct lba_map_val_le_t {
|
||||
};
|
||||
|
||||
struct LBALeafNode
|
||||
: LBANode,
|
||||
common::FixedKVNodeLayout<
|
||||
: FixedKVLeafNode<
|
||||
LEAF_NODE_CAPACITY,
|
||||
lba_node_meta_t, lba_node_meta_le_t,
|
||||
laddr_t, laddr_le_t,
|
||||
lba_map_val_t, lba_map_val_le_t> {
|
||||
lba_map_val_t, lba_map_val_le_t,
|
||||
LBA_BLOCK_SIZE,
|
||||
LBALeafNode> {
|
||||
using Ref = TCachedExtentRef<LBALeafNode>;
|
||||
using internal_iterator_t = const_iterator;
|
||||
template <typename... T>
|
||||
LBALeafNode(T&&... t) :
|
||||
LBANode(std::forward<T>(t)...),
|
||||
FixedKVNodeLayout(get_bptr().c_str()) {}
|
||||
FixedKVLeafNode(std::forward<T>(t)...) {}
|
||||
|
||||
static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
|
||||
|
||||
lba_node_meta_t get_node_meta() const { return get_meta(); }
|
||||
|
||||
CachedExtentRef duplicate_for_write() final {
|
||||
assert(delta_buffer.empty());
|
||||
return CachedExtentRef(new LBALeafNode(*this));
|
||||
};
|
||||
|
||||
delta_buffer_t delta_buffer;
|
||||
delta_buffer_t *maybe_get_delta_buffer() {
|
||||
return is_mutation_pending() ? &delta_buffer : nullptr;
|
||||
}
|
||||
|
||||
void update(
|
||||
const_iterator iter,
|
||||
lba_map_val_t val) {
|
||||
lba_map_val_t val) final {
|
||||
val.paddr = maybe_generate_relative(val.paddr);
|
||||
return journal_update(
|
||||
iter,
|
||||
@ -417,10 +166,10 @@ struct LBALeafNode
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
auto insert(
|
||||
const_iterator insert(
|
||||
const_iterator iter,
|
||||
laddr_t addr,
|
||||
lba_map_val_t val) {
|
||||
lba_map_val_t val) final {
|
||||
val.paddr = maybe_generate_relative(val.paddr);
|
||||
journal_insert(
|
||||
iter,
|
||||
@ -430,65 +179,12 @@ struct LBALeafNode
|
||||
return iter;
|
||||
}
|
||||
|
||||
void remove(const_iterator iter) {
|
||||
void remove(const_iterator iter) final {
|
||||
return journal_remove(
|
||||
iter,
|
||||
maybe_get_delta_buffer());
|
||||
}
|
||||
|
||||
|
||||
std::tuple<Ref, Ref, laddr_t>
|
||||
make_split_children(op_context_t c) {
|
||||
auto left = c.cache.alloc_new_extent<LBALeafNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
auto right = c.cache.alloc_new_extent<LBALeafNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
auto pivot = split_into(*left, *right);
|
||||
left->pin.set_range(left->get_meta());
|
||||
right->pin.set_range(right->get_meta());
|
||||
return std::make_tuple(
|
||||
left,
|
||||
right,
|
||||
pivot);
|
||||
}
|
||||
|
||||
Ref make_full_merge(
|
||||
op_context_t c,
|
||||
Ref &right) {
|
||||
auto replacement = c.cache.alloc_new_extent<LBALeafNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
replacement->merge_from(*this, *right->cast<LBALeafNode>());
|
||||
replacement->pin.set_range(replacement->get_meta());
|
||||
return replacement;
|
||||
}
|
||||
|
||||
std::tuple<Ref, Ref, laddr_t>
|
||||
make_balanced(
|
||||
op_context_t c,
|
||||
Ref &_right,
|
||||
bool prefer_left) {
|
||||
ceph_assert(_right->get_type() == get_type());
|
||||
auto &right = *_right->cast<LBALeafNode>();
|
||||
auto replacement_left = c.cache.alloc_new_extent<LBALeafNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
auto replacement_right = c.cache.alloc_new_extent<LBALeafNode>(
|
||||
c.trans, LBA_BLOCK_SIZE);
|
||||
|
||||
auto pivot = balance_into_new_nodes(
|
||||
*this,
|
||||
right,
|
||||
prefer_left,
|
||||
*replacement_left,
|
||||
*replacement_right);
|
||||
|
||||
replacement_left->pin.set_range(replacement_left->get_meta());
|
||||
replacement_right->pin.set_range(replacement_right->get_meta());
|
||||
return std::make_tuple(
|
||||
replacement_left,
|
||||
replacement_right,
|
||||
pivot);
|
||||
}
|
||||
|
||||
// See LBAInternalNode, same concept
|
||||
void resolve_relative_addrs(paddr_t base);
|
||||
void node_resolve_vals(iterator from, iterator to) const final {
|
||||
@ -517,50 +213,11 @@ struct LBALeafNode
|
||||
}
|
||||
}
|
||||
|
||||
ceph::bufferlist get_delta() final {
|
||||
ceph::buffer::ptr bptr(delta_buffer.get_bytes());
|
||||
delta_buffer.copy_out(bptr.c_str(), bptr.length());
|
||||
ceph::bufferlist bl;
|
||||
bl.push_back(bptr);
|
||||
return bl;
|
||||
}
|
||||
|
||||
void apply_delta_and_adjust_crc(
|
||||
paddr_t base, const ceph::bufferlist &_bl) final {
|
||||
assert(_bl.length());
|
||||
ceph::bufferlist bl = _bl;
|
||||
bl.rebuild();
|
||||
delta_buffer_t buffer;
|
||||
buffer.copy_in(bl.front().c_str(), bl.front().length());
|
||||
buffer.replay(*this);
|
||||
set_last_committed_crc(get_crc32c());
|
||||
resolve_relative_addrs(base);
|
||||
}
|
||||
|
||||
extent_types_t get_type() const final {
|
||||
return TYPE;
|
||||
}
|
||||
|
||||
std::ostream &print_detail(std::ostream &out) const final;
|
||||
|
||||
constexpr static size_t get_min_capacity() {
|
||||
return (get_capacity() - 1) / 2;
|
||||
}
|
||||
|
||||
bool at_max_capacity() const {
|
||||
assert(get_size() <= get_capacity());
|
||||
return get_size() == get_capacity();
|
||||
}
|
||||
|
||||
bool at_min_capacity() const {
|
||||
assert(get_size() >= (get_min_capacity() - 1));
|
||||
return get_size() <= get_min_capacity();
|
||||
}
|
||||
|
||||
bool below_min_capacity() const {
|
||||
assert(get_size() >= (get_min_capacity() - 1));
|
||||
return get_size() < get_min_capacity();
|
||||
}
|
||||
};
|
||||
using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
|
||||
|
||||
|
@ -69,10 +69,10 @@ ObjectDataHandler::write_ret do_removals(
|
||||
LOG_PREFIX(object_data_handler.cc::do_removals);
|
||||
DEBUGT("decreasing ref: {}",
|
||||
ctx.t,
|
||||
pin->get_laddr());
|
||||
pin->get_key());
|
||||
return ctx.tm.dec_ref(
|
||||
ctx.t,
|
||||
pin->get_laddr()
|
||||
pin->get_key()
|
||||
).si_then(
|
||||
[](auto){},
|
||||
ObjectDataHandler::write_iertr::pass_further{},
|
||||
@ -129,14 +129,14 @@ ObjectDataHandler::write_ret do_insertions(
|
||||
region.len
|
||||
).si_then([FNAME, ctx, ®ion](auto pin) {
|
||||
ceph_assert(pin->get_length() == region.len);
|
||||
if (pin->get_laddr() != region.addr) {
|
||||
if (pin->get_key() != region.addr) {
|
||||
ERRORT(
|
||||
"inconsistent laddr: pin: {} region {}",
|
||||
ctx.t,
|
||||
pin->get_laddr(),
|
||||
pin->get_key(),
|
||||
region.addr);
|
||||
}
|
||||
ceph_assert(pin->get_laddr() == region.addr);
|
||||
ceph_assert(pin->get_key() == region.addr);
|
||||
return ObjectDataHandler::write_iertr::now();
|
||||
});
|
||||
}
|
||||
@ -156,7 +156,7 @@ using split_ret_bare = std::pair<
|
||||
using split_ret = get_iertr::future<split_ret_bare>;
|
||||
split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset)
|
||||
{
|
||||
const auto pin_offset = pin->get_laddr();
|
||||
const auto pin_offset = pin->get_key();
|
||||
assert_aligned(pin_offset);
|
||||
ceph_assert(offset >= pin_offset);
|
||||
if (offset == pin_offset) {
|
||||
@ -181,7 +181,7 @@ split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset)
|
||||
);
|
||||
} else {
|
||||
// Data, return up to offset to prepend
|
||||
auto to_prepend = offset - pin->get_laddr();
|
||||
auto to_prepend = offset - pin->get_key();
|
||||
return read_pin(ctx, pin->duplicate()
|
||||
).si_then([to_prepend](auto extent) {
|
||||
return get_iertr::make_ready_future<split_ret_bare>(
|
||||
@ -194,8 +194,8 @@ split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset)
|
||||
/// Reverse of split_pin_left
|
||||
split_ret split_pin_right(context_t ctx, LBAPinRef &pin, laddr_t end)
|
||||
{
|
||||
const auto pin_begin = pin->get_laddr();
|
||||
const auto pin_end = pin->get_laddr() + pin->get_length();
|
||||
const auto pin_begin = pin->get_key();
|
||||
const auto pin_end = pin->get_key() + pin->get_length();
|
||||
assert_aligned(pin_end);
|
||||
ceph_assert(pin_end >= end);
|
||||
if (end == pin_end) {
|
||||
@ -273,7 +273,7 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
|
||||
).si_then([max_object_size=max_object_size, &object_data](auto pin) {
|
||||
ceph_assert(pin->get_length() == max_object_size);
|
||||
object_data.update_reserved(
|
||||
pin->get_laddr(),
|
||||
pin->get_key(),
|
||||
pin->get_length());
|
||||
return write_iertr::now();
|
||||
});
|
||||
@ -302,17 +302,17 @@ ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation(
|
||||
_pins.swap(pins);
|
||||
ceph_assert(pins.size());
|
||||
auto &pin = *pins.front();
|
||||
ceph_assert(pin.get_laddr() >= object_data.get_reserved_data_base());
|
||||
ceph_assert(pin.get_key() >= object_data.get_reserved_data_base());
|
||||
ceph_assert(
|
||||
pin.get_laddr() <= object_data.get_reserved_data_base() + size);
|
||||
auto pin_offset = pin.get_laddr() -
|
||||
pin.get_key() <= object_data.get_reserved_data_base() + size);
|
||||
auto pin_offset = pin.get_key() -
|
||||
object_data.get_reserved_data_base();
|
||||
if ((pin.get_laddr() == (object_data.get_reserved_data_base() + size)) ||
|
||||
if ((pin.get_key() == (object_data.get_reserved_data_base() + size)) ||
|
||||
(pin.get_paddr().is_zero())) {
|
||||
/* First pin is exactly at the boundary or is a zero pin. Either way,
|
||||
* remove all pins and add a single zero pin to the end. */
|
||||
to_write.emplace_back(
|
||||
pin.get_laddr(),
|
||||
pin.get_key(),
|
||||
object_data.get_reserved_data_len() - pin_offset);
|
||||
return clear_iertr::now();
|
||||
} else {
|
||||
@ -332,7 +332,7 @@ ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation(
|
||||
));
|
||||
bl.append_zero(p2roundup(size, ctx.tm.get_block_size()) - size);
|
||||
to_write.emplace_back(
|
||||
pin.get_laddr(),
|
||||
pin.get_key(),
|
||||
bl);
|
||||
to_write.emplace_back(
|
||||
object_data.get_reserved_data_base() +
|
||||
@ -387,9 +387,9 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
|
||||
offset,
|
||||
bl.length());
|
||||
ceph_assert(pins.size() >= 1);
|
||||
auto pin_begin = pins.front()->get_laddr();
|
||||
auto pin_begin = pins.front()->get_key();
|
||||
ceph_assert(pin_begin <= offset);
|
||||
auto pin_end = pins.back()->get_laddr() + pins.back()->get_length();
|
||||
auto pin_end = pins.back()->get_key() + pins.back()->get_length();
|
||||
ceph_assert(pin_end >= (offset + bl.length()));
|
||||
|
||||
return split_pin_left(
|
||||
@ -500,7 +500,7 @@ ObjectDataHandler::read_ret ObjectDataHandler::read(
|
||||
).si_then([ctx, loffset, len, &ret](auto _pins) {
|
||||
// offset~len falls within reserved region and len > 0
|
||||
ceph_assert(_pins.size() >= 1);
|
||||
ceph_assert((*_pins.begin())->get_laddr() <= loffset);
|
||||
ceph_assert((*_pins.begin())->get_key() <= loffset);
|
||||
return seastar::do_with(
|
||||
std::move(_pins),
|
||||
loffset,
|
||||
@ -511,9 +511,9 @@ ObjectDataHandler::read_ret ObjectDataHandler::read(
|
||||
-> read_iertr::future<> {
|
||||
ceph_assert(current <= (loffset + len));
|
||||
ceph_assert(
|
||||
(loffset + len) > pin->get_laddr());
|
||||
(loffset + len) > pin->get_key());
|
||||
laddr_t end = std::min(
|
||||
pin->get_laddr() + pin->get_length(),
|
||||
pin->get_key() + pin->get_length(),
|
||||
loffset + len);
|
||||
if (pin->get_paddr().is_zero()) {
|
||||
ceph_assert(end > current); // See LBAManager::get_mappings
|
||||
@ -583,12 +583,12 @@ ObjectDataHandler::fiemap_ret ObjectDataHandler::fiemap(
|
||||
len
|
||||
).si_then([loffset, len, &object_data, &ret](auto &&pins) {
|
||||
ceph_assert(pins.size() >= 1);
|
||||
ceph_assert((*pins.begin())->get_laddr() <= loffset);
|
||||
ceph_assert((*pins.begin())->get_key() <= loffset);
|
||||
for (auto &&i: pins) {
|
||||
if (!(i->get_paddr().is_zero())) {
|
||||
auto ret_left = std::max(i->get_laddr(), loffset);
|
||||
auto ret_left = std::max(i->get_key(), loffset);
|
||||
auto ret_right = std::min(
|
||||
i->get_laddr() + i->get_length(),
|
||||
i->get_key() + i->get_length(),
|
||||
loffset + len);
|
||||
assert(ret_right > ret_left);
|
||||
ret.emplace(
|
||||
|
@ -712,6 +712,8 @@ struct __attribute((packed)) paddr_le_t {
|
||||
ceph_le64 dev_addr =
|
||||
ceph_le64(P_ADDR_NULL.dev_addr);
|
||||
|
||||
using orig_type = paddr_t;
|
||||
|
||||
paddr_le_t() = default;
|
||||
paddr_le_t(const paddr_t &addr) : dev_addr(ceph_le64(addr.dev_addr)) {}
|
||||
|
||||
@ -800,6 +802,8 @@ constexpr laddr_t L_ADDR_LBAT = L_ADDR_MAX - 2;
|
||||
struct __attribute((packed)) laddr_le_t {
|
||||
ceph_le64 laddr = ceph_le64(L_ADDR_NULL);
|
||||
|
||||
using orig_type = laddr_t;
|
||||
|
||||
laddr_le_t() = default;
|
||||
laddr_le_t(const laddr_le_t &) = default;
|
||||
explicit laddr_le_t(const laddr_t &addr)
|
||||
@ -1081,22 +1085,22 @@ public:
|
||||
};
|
||||
|
||||
/**
|
||||
* lba_root_t
|
||||
* phy_tree_root_t
|
||||
*/
|
||||
class __attribute__((packed)) lba_root_t {
|
||||
class __attribute__((packed)) phy_tree_root_t {
|
||||
paddr_le_t root_addr;
|
||||
depth_le_t depth = init_extent_len_le(0);
|
||||
|
||||
public:
|
||||
lba_root_t() = default;
|
||||
phy_tree_root_t() = default;
|
||||
|
||||
lba_root_t(paddr_t addr, depth_t depth)
|
||||
phy_tree_root_t(paddr_t addr, depth_t depth)
|
||||
: root_addr(addr), depth(init_depth_le(depth)) {}
|
||||
|
||||
lba_root_t(const lba_root_t &o) = default;
|
||||
lba_root_t(lba_root_t &&o) = default;
|
||||
lba_root_t &operator=(const lba_root_t &o) = default;
|
||||
lba_root_t &operator=(lba_root_t &&o) = default;
|
||||
phy_tree_root_t(const phy_tree_root_t &o) = default;
|
||||
phy_tree_root_t(phy_tree_root_t &&o) = default;
|
||||
phy_tree_root_t &operator=(const phy_tree_root_t &o) = default;
|
||||
phy_tree_root_t &operator=(phy_tree_root_t &&o) = default;
|
||||
|
||||
paddr_t get_location() const {
|
||||
return root_addr;
|
||||
@ -1188,6 +1192,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
using lba_root_t = phy_tree_root_t;
|
||||
|
||||
/**
|
||||
* root_t
|
||||
|
@ -481,14 +481,14 @@ TransactionManager::get_extent_if_live_ret TransactionManager::get_extent_if_liv
|
||||
return lba_manager->get_mapping(
|
||||
t,
|
||||
laddr).si_then([=, &t] (LBAPinRef pin) -> inner_ret {
|
||||
ceph_assert(pin->get_laddr() == laddr);
|
||||
ceph_assert(pin->get_key() == laddr);
|
||||
if (pin->get_paddr() == addr) {
|
||||
if (pin->get_length() != (extent_len_t)len) {
|
||||
ERRORT(
|
||||
"Invalid pin {}~{} {} found for "
|
||||
"extent {} {}~{} {}",
|
||||
t,
|
||||
pin->get_laddr(),
|
||||
pin->get_key(),
|
||||
pin->get_length(),
|
||||
pin->get_paddr(),
|
||||
type,
|
||||
|
@ -141,7 +141,7 @@ struct lba_btree_test : btree_test_base {
|
||||
std::map<laddr_t, lba_map_val_t> check;
|
||||
|
||||
auto get_op_context(Transaction &t) {
|
||||
return op_context_t{*cache, t};
|
||||
return op_context_t<laddr_t>{*cache, t};
|
||||
}
|
||||
|
||||
LBAManager::mkfs_ret test_structure_setup(Transaction &t) final {
|
||||
@ -376,11 +376,11 @@ struct btree_lba_manager_test : btree_test_base {
|
||||
}).unsafe_get0();
|
||||
logger().debug("alloc'd: {}", *ret);
|
||||
EXPECT_EQ(len, ret->get_length());
|
||||
auto [b, e] = get_overlap(t, ret->get_laddr(), len);
|
||||
auto [b, e] = get_overlap(t, ret->get_key(), len);
|
||||
EXPECT_EQ(b, e);
|
||||
t.mappings.emplace(
|
||||
std::make_pair(
|
||||
ret->get_laddr(),
|
||||
ret->get_key(),
|
||||
test_extent_t{
|
||||
ret->get_paddr(),
|
||||
ret->get_length(),
|
||||
@ -474,7 +474,7 @@ struct btree_lba_manager_test : btree_test_base {
|
||||
EXPECT_EQ(ret_list.size(), 1);
|
||||
auto &ret = *ret_list.begin();
|
||||
EXPECT_EQ(i.second.addr, ret->get_paddr());
|
||||
EXPECT_EQ(laddr, ret->get_laddr());
|
||||
EXPECT_EQ(laddr, ret->get_key());
|
||||
EXPECT_EQ(len, ret->get_length());
|
||||
|
||||
auto ret_pin = with_trans_intr(
|
||||
@ -484,7 +484,7 @@ struct btree_lba_manager_test : btree_test_base {
|
||||
t, laddr);
|
||||
}).unsafe_get0();
|
||||
EXPECT_EQ(i.second.addr, ret_pin->get_paddr());
|
||||
EXPECT_EQ(laddr, ret_pin->get_laddr());
|
||||
EXPECT_EQ(laddr, ret_pin->get_key());
|
||||
EXPECT_EQ(len, ret_pin->get_length());
|
||||
}
|
||||
with_trans_intr(
|
||||
@ -554,8 +554,8 @@ TEST_F(btree_lba_manager_test, force_split_merge)
|
||||
check_mappings(t);
|
||||
check_mappings();
|
||||
}
|
||||
incref_mapping(t, ret->get_laddr());
|
||||
decref_mapping(t, ret->get_laddr());
|
||||
incref_mapping(t, ret->get_key());
|
||||
decref_mapping(t, ret->get_key());
|
||||
}
|
||||
logger().debug("submitting transaction");
|
||||
submit_test_transaction(std::move(t));
|
||||
|
Loading…
Reference in New Issue
Block a user