crimson/os/seastore: allow EPM to make decisions on the general extent allocation path

Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
This commit is contained in:
Yingxin Cheng 2022-01-27 15:00:44 +08:00
parent d483557c56
commit 32da0e0ecb
8 changed files with 94 additions and 93 deletions

View File

@ -855,37 +855,37 @@ CachedExtentRef Cache::alloc_new_extent_by_type(
Transaction &t, ///< [in, out] current transaction
extent_types_t type, ///< [in] type tag
seastore_off_t length, ///< [in] length
bool delay ///< [in] whether to delay paddr alloc
placement_hint_t hint
)
{
LOG_PREFIX(Cache::alloc_new_extent_by_type);
SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}",
t, type, length, delay);
SUBDEBUGT(seastore_cache, "allocate {} {}B, hint={}",
t, type, length, hint);
switch (type) {
case extent_types_t::ROOT:
ceph_assert(0 == "ROOT is never directly alloc'd");
return CachedExtentRef();
case extent_types_t::LADDR_INTERNAL:
return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, delay);
return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, hint);
case extent_types_t::LADDR_LEAF:
return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, delay);
return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, hint);
case extent_types_t::ONODE_BLOCK_STAGED:
return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, delay);
return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, hint);
case extent_types_t::OMAP_INNER:
return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, delay);
return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, hint);
case extent_types_t::OMAP_LEAF:
return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, delay);
return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, hint);
case extent_types_t::COLL_BLOCK:
return alloc_new_extent<collection_manager::CollectionNode>(t, length, delay);
return alloc_new_extent<collection_manager::CollectionNode>(t, length, hint);
case extent_types_t::OBJECT_DATA_BLOCK:
return alloc_new_extent<ObjectDataBlock>(t, length, delay);
return alloc_new_extent<ObjectDataBlock>(t, length, hint);
case extent_types_t::RETIRED_PLACEHOLDER:
ceph_assert(0 == "impossible");
return CachedExtentRef();
case extent_types_t::TEST_BLOCK:
return alloc_new_extent<TestBlock>(t, length, delay);
return alloc_new_extent<TestBlock>(t, length, hint);
case extent_types_t::TEST_BLOCK_PHYSICAL:
return alloc_new_extent<TestBlockPhysical>(t, length, delay);
return alloc_new_extent<TestBlockPhysical>(t, length, hint);
case extent_types_t::NONE: {
ceph_assert(0 == "NONE is an invalid extent type");
return CachedExtentRef();

View File

@ -9,15 +9,16 @@
#include "include/buffer.h"
#include "crimson/os/seastore/logging.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/transaction.h"
#include "crimson/os/seastore/segment_manager.h"
#include "crimson/common/errorator.h"
#include "crimson/os/seastore/cached_extent.h"
#include "crimson/os/seastore/root_block.h"
#include "crimson/os/seastore/segment_cleaner.h"
#include "crimson/os/seastore/extent_placement_manager.h"
#include "crimson/os/seastore/logging.h"
#include "crimson/os/seastore/random_block_manager.h"
#include "crimson/os/seastore/root_block.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/segment_cleaner.h"
#include "crimson/os/seastore/segment_manager.h"
#include "crimson/os/seastore/transaction.h"
namespace crimson::os::seastore {
@ -102,6 +103,10 @@ public:
Cache(ExtentReader &reader);
~Cache();
void set_epm(ExtentPlacementManager& epm) {
p_epm = &epm;
}
/// Creates empty transaction by source
TransactionRef create_transaction(
Transaction::src_t src,
@ -489,17 +494,20 @@ public:
*/
template <typename T>
TCachedExtentRef<T> alloc_new_extent(
Transaction &t, ///< [in, out] current transaction
seastore_off_t length, ///< [in] length
bool delayed = false ///< [in] whether the paddr allocation of extent is delayed
Transaction &t, ///< [in, out] current transaction
seastore_off_t length, ///< [in] length
placement_hint_t hint = placement_hint_t::HOT
) {
LOG_PREFIX(Cache::alloc_new_extent);
SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}",
t, T::TYPE, length, delayed);
auto ret = CachedExtent::make_cached_extent_ref<T>(
alloc_cache_buf(length));
t.add_fresh_extent(ret, delayed);
SUBTRACET(seastore_cache, "allocate {} {}B, hint={}",
t, T::TYPE, length, hint);
auto result = p_epm->alloc_new_extent(t, T::TYPE, length, hint);
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));
ret->set_paddr(result.paddr);
t.add_fresh_extent(ret);
ret->state = CachedExtent::extent_state_t::INITIAL_WRITE_PENDING;
SUBDEBUGT(seastore_cache, "allocated {} {}B extent at {}, hint={} -- {}",
t, T::TYPE, length, result.paddr, hint, *ret);
return ret;
}
@ -512,7 +520,7 @@ public:
Transaction &t, ///< [in, out] current transaction
extent_types_t type, ///< [in] type tag
seastore_off_t length, ///< [in] length
bool delayed = false ///< [in] whether delay addr allocation
placement_hint_t hint = placement_hint_t::HOT
);
/**
@ -732,6 +740,7 @@ public:
private:
ExtentReader &reader; ///< ref to extent reader
ExtentPlacementManager* p_epm = nullptr;
RootBlockRef root; ///< ref to current root
ExtentIndex extents; ///< set of live extents

View File

@ -1,9 +1,11 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
// vim: ts=8 sw=2 smarttab expandtab
#include "crimson/os/seastore/journal.h"
#include "crimson/os/seastore/extent_placement_manager.h"
#include "crimson/os/seastore/lba_manager.h"
#include "crimson/os/seastore/segment_cleaner.h"
namespace {
seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_seastore_tm);
@ -18,13 +20,11 @@ SegmentedAllocator::SegmentedAllocator(
SegmentProvider& sp,
SegmentManager& sm,
LBAManager& lba_manager,
Journal& journal,
Cache& cache)
Journal& journal)
: segment_provider(sp),
segment_manager(sm),
lba_manager(lba_manager),
journal(journal),
cache(cache)
journal(journal)
{
std::generate_n(
std::back_inserter(writers),
@ -35,8 +35,7 @@ SegmentedAllocator::SegmentedAllocator(
segment_provider,
segment_manager,
lba_manager,
journal,
cache};
journal};
});
}

View File

@ -6,10 +6,11 @@
#include "seastar/core/gate.hh"
#include "crimson/common/condition_variable.h"
#include "crimson/os/seastore/logging.h"
#include "crimson/os/seastore/cache.h"
#include "crimson/os/seastore/cached_extent.h"
#include "crimson/os/seastore/lba_manager.h"
#include "crimson/os/seastore/journal.h"
#include "crimson/os/seastore/logging.h"
#include "crimson/os/seastore/segment_manager.h"
#include "crimson/os/seastore/transaction.h"
namespace crimson::os::seastore {
@ -162,6 +163,9 @@ struct open_segment_wrapper_t : public boost::intrusive_ref_counter<
using open_segment_wrapper_ref =
boost::intrusive_ptr<open_segment_wrapper_t>;
class LBAManager;
class SegmentProvider;
/**
* SegmentedAllocator
*
@ -182,13 +186,11 @@ class SegmentedAllocator : public ExtentAllocator {
SegmentProvider& sp,
SegmentManager& sm,
LBAManager& lba_manager,
Journal& journal,
Cache& cache)
Journal& journal)
: segment_provider(sp),
segment_manager(sm),
lba_manager(lba_manager),
journal(journal),
cache(cache)
journal(journal)
{}
Writer(Writer &&) = default;
@ -203,8 +205,9 @@ class SegmentedAllocator : public ExtentAllocator {
});
}
private:
using update_lba_mapping_iertr = LBAManager::update_le_mapping_iertr;
using finish_record_iertr = update_lba_mapping_iertr;
using finish_record_ertr = crimson::errorator<
crimson::ct_error::input_output_error>;
using finish_record_iertr = trans_iertr<finish_record_ertr>;
using finish_record_ret = finish_record_iertr::future<>;
finish_record_ret finish_write(
Transaction& t,
@ -237,15 +240,13 @@ class SegmentedAllocator : public ExtentAllocator {
crimson::condition_variable segment_rotation_guard;
seastar::gate writer_guard;
bool rolling_segment = false;
Cache& cache;
};
public:
SegmentedAllocator(
SegmentProvider& sp,
SegmentManager& sm,
LBAManager& lba_manager,
Journal& journal,
Cache& cache);
Journal& journal);
Writer &get_writer(placement_hint_t hint) {
return writers[std::rand() % writers.size()];
@ -282,58 +283,50 @@ private:
std::vector<Writer> writers;
LBAManager& lba_manager;
Journal& journal;
Cache& cache;
};
class ExtentPlacementManager {
public:
ExtentPlacementManager(
Cache& cache,
LBAManager& lba_manager
) : cache(cache), lba_manager(lba_manager) {}
) : lba_manager(lba_manager) {}
/**
* alloc_new_extent_by_type
*
* Create a new extent, CachedExtent::poffset may not be set
* if a delayed allocation is needed.
*/
CachedExtentRef alloc_new_extent_by_type(
struct alloc_result_t {
paddr_t paddr;
bufferptr bp;
};
alloc_result_t alloc_new_extent(
Transaction& t,
extent_types_t type,
seastore_off_t length,
placement_hint_t hint) {
// only logical extents should fall in this path
assert(is_logical_type(type));
placement_hint_t hint
) {
assert(hint < placement_hint_t::NUM_HINTS);
auto dtype = get_allocator_type(hint);
// FIXME: set delay for COLD extent when the record overhead is low
bool delay = (hint > placement_hint_t::COLD &&
can_delay_allocation(dtype));
CachedExtentRef extent = cache.alloc_new_extent_by_type(
t, type, length, delay);
extent->hint = hint;
return extent;
}
template<
typename T,
std::enable_if_t<std::is_base_of_v<LogicalCachedExtent, T>, int> = 0>
TCachedExtentRef<T> alloc_new_extent(
Transaction& t,
seastore_off_t length,
placement_hint_t hint) {
// only logical extents should fall in this path
static_assert(is_logical_type(T::TYPE));
assert(hint < placement_hint_t::NUM_HINTS);
auto dtype = get_allocator_type(hint);
// XXX: bp might be extended to point to differnt memory (e.g. PMem)
// according to the allocator.
auto bp = ceph::bufferptr(
buffer::create_page_aligned(length));
bp.zero();
if (!is_logical_type(type)) {
// TODO: implement out-of-line strategy for physical extent.
return {make_record_relative_paddr(0),
std::move(bp)};
}
// FIXME: set delay for COLD extent when the record overhead is low
// NOTE: delay means to delay the decision about whether to write the
// extent as inline or out-of-line extents.
bool delay = (hint > placement_hint_t::COLD &&
can_delay_allocation(dtype));
TCachedExtentRef<T> extent = cache.alloc_new_extent<T>(
t, length, delay);
extent->hint = hint;
return extent;
can_delay_allocation(get_allocator_type(hint)));
if (delay) {
return {make_delayed_temp_paddr(0),
std::move(bp)};
} else {
return {make_record_relative_paddr(0),
std::move(bp)};
}
}
/**
@ -395,7 +388,6 @@ private:
return devices[std::rand() % devices.size()];
}
Cache& cache;
LBAManager& lba_manager;
std::map<device_type_t, std::vector<ExtentAllocatorRef>> allocators;
};

View File

@ -1418,7 +1418,8 @@ seastar::future<std::unique_ptr<SeaStore>> make_seastore(
auto cache = std::make_unique<Cache>(scanner_ref);
auto lba_manager = lba_manager::create_lba_manager(*sm, *cache);
auto epm = std::make_unique<ExtentPlacementManager>(*cache, *lba_manager);
auto epm = std::make_unique<ExtentPlacementManager>(*lba_manager);
cache->set_epm(*epm);
journal->set_segment_provider(&*segment_cleaner);

View File

@ -116,15 +116,16 @@ public:
}
void add_fresh_extent(
CachedExtentRef ref,
bool delayed = false) {
CachedExtentRef ref) {
ceph_assert(!is_weak());
if (delayed) {
if (ref->get_paddr().is_delayed()) {
assert(ref->get_paddr() == make_delayed_temp_paddr(0));
assert(ref->is_logical());
ref->set_paddr(make_delayed_temp_paddr(delayed_temp_offset));
delayed_temp_offset += ref->get_length();
delayed_alloc_list.emplace_back(ref->cast<LogicalCachedExtent>());
} else {
assert(ref->get_paddr() == make_record_relative_paddr(0));
ref->set_paddr(make_record_relative_paddr(offset));
offset += ref->get_length();
inline_block_list.push_back(ref);

View File

@ -381,7 +381,7 @@ TransactionManager::rewrite_logical_extent(
auto lextent = extent->cast<LogicalCachedExtent>();
cache->retire_extent(t, extent);
auto nlextent = epm->alloc_new_extent_by_type(
auto nlextent = cache->alloc_new_extent_by_type(
t,
lextent->get_type(),
lextent->get_length(),

View File

@ -304,7 +304,7 @@ public:
LOG_PREFIX(TransactionManager::alloc_extent);
SUBTRACET(seastore_tm, "{} len={}, placement_hint={}, laddr_hint={}",
t, T::TYPE, len, placement_hint, laddr_hint);
auto ext = epm->alloc_new_extent<T>(
auto ext = cache->alloc_new_extent<T>(
t,
len,
placement_hint);
@ -551,8 +551,7 @@ public:
*segment_cleaner,
*sm,
*lba_manager,
*journal,
*cache));
*journal));
}
~TransactionManager();