mirror of
https://github.com/ceph/ceph
synced 2025-02-21 09:57:26 +00:00
crimson/os/seastore: allow EPM to make decisions on the general extent allocation path
Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
This commit is contained in:
parent
d483557c56
commit
32da0e0ecb
@ -855,37 +855,37 @@ CachedExtentRef Cache::alloc_new_extent_by_type(
|
||||
Transaction &t, ///< [in, out] current transaction
|
||||
extent_types_t type, ///< [in] type tag
|
||||
seastore_off_t length, ///< [in] length
|
||||
bool delay ///< [in] whether to delay paddr alloc
|
||||
placement_hint_t hint
|
||||
)
|
||||
{
|
||||
LOG_PREFIX(Cache::alloc_new_extent_by_type);
|
||||
SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}",
|
||||
t, type, length, delay);
|
||||
SUBDEBUGT(seastore_cache, "allocate {} {}B, hint={}",
|
||||
t, type, length, hint);
|
||||
switch (type) {
|
||||
case extent_types_t::ROOT:
|
||||
ceph_assert(0 == "ROOT is never directly alloc'd");
|
||||
return CachedExtentRef();
|
||||
case extent_types_t::LADDR_INTERNAL:
|
||||
return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, delay);
|
||||
return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, hint);
|
||||
case extent_types_t::LADDR_LEAF:
|
||||
return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, delay);
|
||||
return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, hint);
|
||||
case extent_types_t::ONODE_BLOCK_STAGED:
|
||||
return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, delay);
|
||||
return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, hint);
|
||||
case extent_types_t::OMAP_INNER:
|
||||
return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, delay);
|
||||
return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, hint);
|
||||
case extent_types_t::OMAP_LEAF:
|
||||
return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, delay);
|
||||
return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, hint);
|
||||
case extent_types_t::COLL_BLOCK:
|
||||
return alloc_new_extent<collection_manager::CollectionNode>(t, length, delay);
|
||||
return alloc_new_extent<collection_manager::CollectionNode>(t, length, hint);
|
||||
case extent_types_t::OBJECT_DATA_BLOCK:
|
||||
return alloc_new_extent<ObjectDataBlock>(t, length, delay);
|
||||
return alloc_new_extent<ObjectDataBlock>(t, length, hint);
|
||||
case extent_types_t::RETIRED_PLACEHOLDER:
|
||||
ceph_assert(0 == "impossible");
|
||||
return CachedExtentRef();
|
||||
case extent_types_t::TEST_BLOCK:
|
||||
return alloc_new_extent<TestBlock>(t, length, delay);
|
||||
return alloc_new_extent<TestBlock>(t, length, hint);
|
||||
case extent_types_t::TEST_BLOCK_PHYSICAL:
|
||||
return alloc_new_extent<TestBlockPhysical>(t, length, delay);
|
||||
return alloc_new_extent<TestBlockPhysical>(t, length, hint);
|
||||
case extent_types_t::NONE: {
|
||||
ceph_assert(0 == "NONE is an invalid extent type");
|
||||
return CachedExtentRef();
|
||||
|
@ -9,15 +9,16 @@
|
||||
|
||||
#include "include/buffer.h"
|
||||
|
||||
#include "crimson/os/seastore/logging.h"
|
||||
#include "crimson/os/seastore/seastore_types.h"
|
||||
#include "crimson/os/seastore/transaction.h"
|
||||
#include "crimson/os/seastore/segment_manager.h"
|
||||
#include "crimson/common/errorator.h"
|
||||
#include "crimson/os/seastore/cached_extent.h"
|
||||
#include "crimson/os/seastore/root_block.h"
|
||||
#include "crimson/os/seastore/segment_cleaner.h"
|
||||
#include "crimson/os/seastore/extent_placement_manager.h"
|
||||
#include "crimson/os/seastore/logging.h"
|
||||
#include "crimson/os/seastore/random_block_manager.h"
|
||||
#include "crimson/os/seastore/root_block.h"
|
||||
#include "crimson/os/seastore/seastore_types.h"
|
||||
#include "crimson/os/seastore/segment_cleaner.h"
|
||||
#include "crimson/os/seastore/segment_manager.h"
|
||||
#include "crimson/os/seastore/transaction.h"
|
||||
|
||||
namespace crimson::os::seastore {
|
||||
|
||||
@ -102,6 +103,10 @@ public:
|
||||
Cache(ExtentReader &reader);
|
||||
~Cache();
|
||||
|
||||
void set_epm(ExtentPlacementManager& epm) {
|
||||
p_epm = &epm;
|
||||
}
|
||||
|
||||
/// Creates empty transaction by source
|
||||
TransactionRef create_transaction(
|
||||
Transaction::src_t src,
|
||||
@ -489,17 +494,20 @@ public:
|
||||
*/
|
||||
template <typename T>
|
||||
TCachedExtentRef<T> alloc_new_extent(
|
||||
Transaction &t, ///< [in, out] current transaction
|
||||
seastore_off_t length, ///< [in] length
|
||||
bool delayed = false ///< [in] whether the paddr allocation of extent is delayed
|
||||
Transaction &t, ///< [in, out] current transaction
|
||||
seastore_off_t length, ///< [in] length
|
||||
placement_hint_t hint = placement_hint_t::HOT
|
||||
) {
|
||||
LOG_PREFIX(Cache::alloc_new_extent);
|
||||
SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}",
|
||||
t, T::TYPE, length, delayed);
|
||||
auto ret = CachedExtent::make_cached_extent_ref<T>(
|
||||
alloc_cache_buf(length));
|
||||
t.add_fresh_extent(ret, delayed);
|
||||
SUBTRACET(seastore_cache, "allocate {} {}B, hint={}",
|
||||
t, T::TYPE, length, hint);
|
||||
auto result = p_epm->alloc_new_extent(t, T::TYPE, length, hint);
|
||||
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));
|
||||
ret->set_paddr(result.paddr);
|
||||
t.add_fresh_extent(ret);
|
||||
ret->state = CachedExtent::extent_state_t::INITIAL_WRITE_PENDING;
|
||||
SUBDEBUGT(seastore_cache, "allocated {} {}B extent at {}, hint={} -- {}",
|
||||
t, T::TYPE, length, result.paddr, hint, *ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -512,7 +520,7 @@ public:
|
||||
Transaction &t, ///< [in, out] current transaction
|
||||
extent_types_t type, ///< [in] type tag
|
||||
seastore_off_t length, ///< [in] length
|
||||
bool delayed = false ///< [in] whether delay addr allocation
|
||||
placement_hint_t hint = placement_hint_t::HOT
|
||||
);
|
||||
|
||||
/**
|
||||
@ -732,6 +740,7 @@ public:
|
||||
|
||||
private:
|
||||
ExtentReader &reader; ///< ref to extent reader
|
||||
ExtentPlacementManager* p_epm = nullptr;
|
||||
RootBlockRef root; ///< ref to current root
|
||||
ExtentIndex extents; ///< set of live extents
|
||||
|
||||
|
@ -1,9 +1,11 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
|
||||
// vim: ts=8 sw=2 smarttab expandtab
|
||||
|
||||
#include "crimson/os/seastore/journal.h"
|
||||
#include "crimson/os/seastore/extent_placement_manager.h"
|
||||
|
||||
#include "crimson/os/seastore/lba_manager.h"
|
||||
#include "crimson/os/seastore/segment_cleaner.h"
|
||||
|
||||
namespace {
|
||||
seastar::logger& logger() {
|
||||
return crimson::get_logger(ceph_subsys_seastore_tm);
|
||||
@ -18,13 +20,11 @@ SegmentedAllocator::SegmentedAllocator(
|
||||
SegmentProvider& sp,
|
||||
SegmentManager& sm,
|
||||
LBAManager& lba_manager,
|
||||
Journal& journal,
|
||||
Cache& cache)
|
||||
Journal& journal)
|
||||
: segment_provider(sp),
|
||||
segment_manager(sm),
|
||||
lba_manager(lba_manager),
|
||||
journal(journal),
|
||||
cache(cache)
|
||||
journal(journal)
|
||||
{
|
||||
std::generate_n(
|
||||
std::back_inserter(writers),
|
||||
@ -35,8 +35,7 @@ SegmentedAllocator::SegmentedAllocator(
|
||||
segment_provider,
|
||||
segment_manager,
|
||||
lba_manager,
|
||||
journal,
|
||||
cache};
|
||||
journal};
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -6,10 +6,11 @@
|
||||
#include "seastar/core/gate.hh"
|
||||
|
||||
#include "crimson/common/condition_variable.h"
|
||||
#include "crimson/os/seastore/logging.h"
|
||||
#include "crimson/os/seastore/cache.h"
|
||||
#include "crimson/os/seastore/cached_extent.h"
|
||||
#include "crimson/os/seastore/lba_manager.h"
|
||||
#include "crimson/os/seastore/journal.h"
|
||||
#include "crimson/os/seastore/logging.h"
|
||||
#include "crimson/os/seastore/segment_manager.h"
|
||||
#include "crimson/os/seastore/transaction.h"
|
||||
|
||||
namespace crimson::os::seastore {
|
||||
|
||||
@ -162,6 +163,9 @@ struct open_segment_wrapper_t : public boost::intrusive_ref_counter<
|
||||
using open_segment_wrapper_ref =
|
||||
boost::intrusive_ptr<open_segment_wrapper_t>;
|
||||
|
||||
class LBAManager;
|
||||
class SegmentProvider;
|
||||
|
||||
/**
|
||||
* SegmentedAllocator
|
||||
*
|
||||
@ -182,13 +186,11 @@ class SegmentedAllocator : public ExtentAllocator {
|
||||
SegmentProvider& sp,
|
||||
SegmentManager& sm,
|
||||
LBAManager& lba_manager,
|
||||
Journal& journal,
|
||||
Cache& cache)
|
||||
Journal& journal)
|
||||
: segment_provider(sp),
|
||||
segment_manager(sm),
|
||||
lba_manager(lba_manager),
|
||||
journal(journal),
|
||||
cache(cache)
|
||||
journal(journal)
|
||||
{}
|
||||
Writer(Writer &&) = default;
|
||||
|
||||
@ -203,8 +205,9 @@ class SegmentedAllocator : public ExtentAllocator {
|
||||
});
|
||||
}
|
||||
private:
|
||||
using update_lba_mapping_iertr = LBAManager::update_le_mapping_iertr;
|
||||
using finish_record_iertr = update_lba_mapping_iertr;
|
||||
using finish_record_ertr = crimson::errorator<
|
||||
crimson::ct_error::input_output_error>;
|
||||
using finish_record_iertr = trans_iertr<finish_record_ertr>;
|
||||
using finish_record_ret = finish_record_iertr::future<>;
|
||||
finish_record_ret finish_write(
|
||||
Transaction& t,
|
||||
@ -237,15 +240,13 @@ class SegmentedAllocator : public ExtentAllocator {
|
||||
crimson::condition_variable segment_rotation_guard;
|
||||
seastar::gate writer_guard;
|
||||
bool rolling_segment = false;
|
||||
Cache& cache;
|
||||
};
|
||||
public:
|
||||
SegmentedAllocator(
|
||||
SegmentProvider& sp,
|
||||
SegmentManager& sm,
|
||||
LBAManager& lba_manager,
|
||||
Journal& journal,
|
||||
Cache& cache);
|
||||
Journal& journal);
|
||||
|
||||
Writer &get_writer(placement_hint_t hint) {
|
||||
return writers[std::rand() % writers.size()];
|
||||
@ -282,58 +283,50 @@ private:
|
||||
std::vector<Writer> writers;
|
||||
LBAManager& lba_manager;
|
||||
Journal& journal;
|
||||
Cache& cache;
|
||||
};
|
||||
|
||||
class ExtentPlacementManager {
|
||||
public:
|
||||
ExtentPlacementManager(
|
||||
Cache& cache,
|
||||
LBAManager& lba_manager
|
||||
) : cache(cache), lba_manager(lba_manager) {}
|
||||
) : lba_manager(lba_manager) {}
|
||||
|
||||
/**
|
||||
* alloc_new_extent_by_type
|
||||
*
|
||||
* Create a new extent, CachedExtent::poffset may not be set
|
||||
* if a delayed allocation is needed.
|
||||
*/
|
||||
CachedExtentRef alloc_new_extent_by_type(
|
||||
struct alloc_result_t {
|
||||
paddr_t paddr;
|
||||
bufferptr bp;
|
||||
};
|
||||
alloc_result_t alloc_new_extent(
|
||||
Transaction& t,
|
||||
extent_types_t type,
|
||||
seastore_off_t length,
|
||||
placement_hint_t hint) {
|
||||
// only logical extents should fall in this path
|
||||
assert(is_logical_type(type));
|
||||
placement_hint_t hint
|
||||
) {
|
||||
assert(hint < placement_hint_t::NUM_HINTS);
|
||||
auto dtype = get_allocator_type(hint);
|
||||
// FIXME: set delay for COLD extent when the record overhead is low
|
||||
bool delay = (hint > placement_hint_t::COLD &&
|
||||
can_delay_allocation(dtype));
|
||||
CachedExtentRef extent = cache.alloc_new_extent_by_type(
|
||||
t, type, length, delay);
|
||||
extent->hint = hint;
|
||||
return extent;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
std::enable_if_t<std::is_base_of_v<LogicalCachedExtent, T>, int> = 0>
|
||||
TCachedExtentRef<T> alloc_new_extent(
|
||||
Transaction& t,
|
||||
seastore_off_t length,
|
||||
placement_hint_t hint) {
|
||||
// only logical extents should fall in this path
|
||||
static_assert(is_logical_type(T::TYPE));
|
||||
assert(hint < placement_hint_t::NUM_HINTS);
|
||||
auto dtype = get_allocator_type(hint);
|
||||
// XXX: bp might be extended to point to differnt memory (e.g. PMem)
|
||||
// according to the allocator.
|
||||
auto bp = ceph::bufferptr(
|
||||
buffer::create_page_aligned(length));
|
||||
bp.zero();
|
||||
|
||||
if (!is_logical_type(type)) {
|
||||
// TODO: implement out-of-line strategy for physical extent.
|
||||
return {make_record_relative_paddr(0),
|
||||
std::move(bp)};
|
||||
}
|
||||
|
||||
// FIXME: set delay for COLD extent when the record overhead is low
|
||||
// NOTE: delay means to delay the decision about whether to write the
|
||||
// extent as inline or out-of-line extents.
|
||||
bool delay = (hint > placement_hint_t::COLD &&
|
||||
can_delay_allocation(dtype));
|
||||
TCachedExtentRef<T> extent = cache.alloc_new_extent<T>(
|
||||
t, length, delay);
|
||||
extent->hint = hint;
|
||||
return extent;
|
||||
can_delay_allocation(get_allocator_type(hint)));
|
||||
if (delay) {
|
||||
return {make_delayed_temp_paddr(0),
|
||||
std::move(bp)};
|
||||
} else {
|
||||
return {make_record_relative_paddr(0),
|
||||
std::move(bp)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -395,7 +388,6 @@ private:
|
||||
return devices[std::rand() % devices.size()];
|
||||
}
|
||||
|
||||
Cache& cache;
|
||||
LBAManager& lba_manager;
|
||||
std::map<device_type_t, std::vector<ExtentAllocatorRef>> allocators;
|
||||
};
|
||||
|
@ -1418,7 +1418,8 @@ seastar::future<std::unique_ptr<SeaStore>> make_seastore(
|
||||
auto cache = std::make_unique<Cache>(scanner_ref);
|
||||
auto lba_manager = lba_manager::create_lba_manager(*sm, *cache);
|
||||
|
||||
auto epm = std::make_unique<ExtentPlacementManager>(*cache, *lba_manager);
|
||||
auto epm = std::make_unique<ExtentPlacementManager>(*lba_manager);
|
||||
cache->set_epm(*epm);
|
||||
|
||||
journal->set_segment_provider(&*segment_cleaner);
|
||||
|
||||
|
@ -116,15 +116,16 @@ public:
|
||||
}
|
||||
|
||||
void add_fresh_extent(
|
||||
CachedExtentRef ref,
|
||||
bool delayed = false) {
|
||||
CachedExtentRef ref) {
|
||||
ceph_assert(!is_weak());
|
||||
if (delayed) {
|
||||
if (ref->get_paddr().is_delayed()) {
|
||||
assert(ref->get_paddr() == make_delayed_temp_paddr(0));
|
||||
assert(ref->is_logical());
|
||||
ref->set_paddr(make_delayed_temp_paddr(delayed_temp_offset));
|
||||
delayed_temp_offset += ref->get_length();
|
||||
delayed_alloc_list.emplace_back(ref->cast<LogicalCachedExtent>());
|
||||
} else {
|
||||
assert(ref->get_paddr() == make_record_relative_paddr(0));
|
||||
ref->set_paddr(make_record_relative_paddr(offset));
|
||||
offset += ref->get_length();
|
||||
inline_block_list.push_back(ref);
|
||||
|
@ -381,7 +381,7 @@ TransactionManager::rewrite_logical_extent(
|
||||
|
||||
auto lextent = extent->cast<LogicalCachedExtent>();
|
||||
cache->retire_extent(t, extent);
|
||||
auto nlextent = epm->alloc_new_extent_by_type(
|
||||
auto nlextent = cache->alloc_new_extent_by_type(
|
||||
t,
|
||||
lextent->get_type(),
|
||||
lextent->get_length(),
|
||||
|
@ -304,7 +304,7 @@ public:
|
||||
LOG_PREFIX(TransactionManager::alloc_extent);
|
||||
SUBTRACET(seastore_tm, "{} len={}, placement_hint={}, laddr_hint={}",
|
||||
t, T::TYPE, len, placement_hint, laddr_hint);
|
||||
auto ext = epm->alloc_new_extent<T>(
|
||||
auto ext = cache->alloc_new_extent<T>(
|
||||
t,
|
||||
len,
|
||||
placement_hint);
|
||||
@ -551,8 +551,7 @@ public:
|
||||
*segment_cleaner,
|
||||
*sm,
|
||||
*lba_manager,
|
||||
*journal,
|
||||
*cache));
|
||||
*journal));
|
||||
}
|
||||
|
||||
~TransactionManager();
|
||||
|
Loading…
Reference in New Issue
Block a user