mirror of
https://github.com/ceph/ceph
synced 2025-01-01 16:42:29 +00:00
crimson/os/seastore: implement generational GC
Place extents into the dedicated RecordSubmitter by their data-category and reclaimed-count. Segments of different data-category or reclaimed-count should have different locality in the access patterns, which is the foundation to form a desired bimodal distribution of segment utilizations, so that GC can be more efficient. Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
This commit is contained in:
parent
12aade9b2c
commit
6b662cbd21
@ -13,13 +13,18 @@ SET_SUBSYS(seastore_cleaner);
|
||||
namespace crimson::os::seastore {
|
||||
|
||||
void segment_info_t::set_open(
|
||||
segment_seq_t _seq, segment_type_t _type)
|
||||
segment_seq_t _seq, segment_type_t _type,
|
||||
data_category_t _category, reclaim_gen_t _generation)
|
||||
{
|
||||
ceph_assert(_seq != NULL_SEG_SEQ);
|
||||
ceph_assert(_type != segment_type_t::NULL_SEG);
|
||||
ceph_assert(_category != data_category_t::NUM);
|
||||
ceph_assert(_generation < RECLAIM_GENERATIONS);
|
||||
state = Segment::segment_state_t::OPEN;
|
||||
seq = _seq;
|
||||
type = _type;
|
||||
category = _category;
|
||||
generation = _generation;
|
||||
written_to = 0;
|
||||
}
|
||||
|
||||
@ -28,6 +33,8 @@ void segment_info_t::set_empty()
|
||||
state = Segment::segment_state_t::EMPTY;
|
||||
seq = NULL_SEG_SEQ;
|
||||
type = segment_type_t::NULL_SEG;
|
||||
category = data_category_t::NUM;
|
||||
generation = NULL_GENERATION;
|
||||
last_modified = {};
|
||||
last_rewritten = {};
|
||||
written_to = 0;
|
||||
@ -40,13 +47,19 @@ void segment_info_t::set_closed()
|
||||
}
|
||||
|
||||
void segment_info_t::init_closed(
|
||||
segment_seq_t _seq, segment_type_t _type, std::size_t seg_size)
|
||||
segment_seq_t _seq, segment_type_t _type,
|
||||
data_category_t _category, reclaim_gen_t _generation,
|
||||
std::size_t seg_size)
|
||||
{
|
||||
ceph_assert(_seq != NULL_SEG_SEQ);
|
||||
ceph_assert(_type != segment_type_t::NULL_SEG);
|
||||
ceph_assert(_category != data_category_t::NUM);
|
||||
ceph_assert(_generation < RECLAIM_GENERATIONS);
|
||||
state = Segment::segment_state_t::CLOSED;
|
||||
seq = _seq;
|
||||
type = _type;
|
||||
category = _category;
|
||||
generation = _generation;
|
||||
written_to = seg_size;
|
||||
}
|
||||
|
||||
@ -59,6 +72,8 @@ std::ostream& operator<<(std::ostream &out, const segment_info_t &info)
|
||||
} else { // open or closed
|
||||
out << ", seq=" << segment_seq_printer_t{info.seq}
|
||||
<< ", type=" << info.type
|
||||
<< ", category=" << info.category
|
||||
<< ", generation=" << reclaim_gen_printer_t{info.generation}
|
||||
<< ", last_modified=" << info.last_modified.time_since_epoch()
|
||||
<< ", last_rewritten=" << info.last_rewritten.time_since_epoch()
|
||||
<< ", written_to=" << info.written_to;
|
||||
@ -124,15 +139,19 @@ void segments_info_t::add_segment_manager(
|
||||
}
|
||||
|
||||
void segments_info_t::init_closed(
|
||||
segment_id_t segment, segment_seq_t seq, segment_type_t type)
|
||||
segment_id_t segment, segment_seq_t seq, segment_type_t type,
|
||||
data_category_t category, reclaim_gen_t generation)
|
||||
{
|
||||
LOG_PREFIX(segments_info_t::init_closed);
|
||||
auto& segment_info = segments[segment];
|
||||
INFO("initiating {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
|
||||
INFO("initiating {} {} {} {} {}, {}, "
|
||||
"num_segments(empty={}, opened={}, closed={})",
|
||||
segment, segment_seq_printer_t{seq}, type,
|
||||
category, reclaim_gen_printer_t{generation},
|
||||
segment_info, num_empty, num_open, num_closed);
|
||||
ceph_assert(segment_info.is_empty());
|
||||
segment_info.init_closed(seq, type, get_segment_size());
|
||||
segment_info.init_closed(
|
||||
seq, type, category, generation, get_segment_size());
|
||||
ceph_assert(num_empty > 0);
|
||||
--num_empty;
|
||||
++num_closed;
|
||||
@ -147,15 +166,18 @@ void segments_info_t::init_closed(
|
||||
}
|
||||
|
||||
void segments_info_t::mark_open(
|
||||
segment_id_t segment, segment_seq_t seq, segment_type_t type)
|
||||
segment_id_t segment, segment_seq_t seq, segment_type_t type,
|
||||
data_category_t category, reclaim_gen_t generation)
|
||||
{
|
||||
LOG_PREFIX(segments_info_t::mark_open);
|
||||
auto& segment_info = segments[segment];
|
||||
INFO("opening {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
|
||||
INFO("opening {} {} {} {} {}, {}, "
|
||||
"num_segments(empty={}, opened={}, closed={})",
|
||||
segment, segment_seq_printer_t{seq}, type,
|
||||
category, reclaim_gen_printer_t{generation},
|
||||
segment_info, num_empty, num_open, num_closed);
|
||||
ceph_assert(segment_info.is_empty());
|
||||
segment_info.set_open(seq, type);
|
||||
segment_info.set_open(seq, type, category, generation);
|
||||
ceph_assert(num_empty > 0);
|
||||
--num_empty;
|
||||
++num_open;
|
||||
@ -531,7 +553,9 @@ void AsyncCleaner::register_metrics()
|
||||
|
||||
segment_id_t AsyncCleaner::allocate_segment(
|
||||
segment_seq_t seq,
|
||||
segment_type_t type)
|
||||
segment_type_t type,
|
||||
data_category_t category,
|
||||
reclaim_gen_t generation)
|
||||
{
|
||||
LOG_PREFIX(AsyncCleaner::allocate_segment);
|
||||
assert(seq != NULL_SEG_SEQ);
|
||||
@ -542,7 +566,7 @@ segment_id_t AsyncCleaner::allocate_segment(
|
||||
auto& segment_info = it->second;
|
||||
if (segment_info.is_empty()) {
|
||||
auto old_usage = calc_utilization(seg_id);
|
||||
segments.mark_open(seg_id, seq, type);
|
||||
segments.mark_open(seg_id, seq, type, category, generation);
|
||||
auto new_usage = calc_utilization(seg_id);
|
||||
adjust_segment_util(old_usage, new_usage);
|
||||
INFO("opened, should_block_on_gc {}, projected_avail_ratio {}, "
|
||||
@ -682,7 +706,7 @@ AsyncCleaner::rewrite_dirty_ret AsyncCleaner::rewrite_dirty(
|
||||
dirty_list,
|
||||
[this, FNAME, &t](auto &e) {
|
||||
DEBUGT("cleaning {}", t, *e);
|
||||
return ecb->rewrite_extent(t, e);
|
||||
return ecb->rewrite_extent(t, e, DIRTY_GENERATION);
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -867,11 +891,12 @@ AsyncCleaner::gc_reclaim_space_ret AsyncCleaner::gc_reclaim_space()
|
||||
INFO("reclaim {} {} start", seg_id, segment_info);
|
||||
ceph_assert(segment_info.is_closed());
|
||||
reclaim_state = reclaim_state_t::create(
|
||||
seg_id, segments.get_segment_size());
|
||||
seg_id, segment_info.generation, segments.get_segment_size());
|
||||
}
|
||||
reclaim_state->advance(config.reclaim_bytes_per_cycle);
|
||||
|
||||
DEBUG("reclaiming {}~{}",
|
||||
DEBUG("reclaiming {} {}~{}",
|
||||
reclaim_gen_printer_t{reclaim_state->generation},
|
||||
reclaim_state->start_pos,
|
||||
reclaim_state->end_pos);
|
||||
double pavail_ratio = get_projected_available_ratio();
|
||||
@ -965,7 +990,7 @@ AsyncCleaner::gc_reclaim_space_ret AsyncCleaner::gc_reclaim_space()
|
||||
extents,
|
||||
[this, &t, &reclaimed](auto &ext) {
|
||||
reclaimed += ext->get_length();
|
||||
return ecb->rewrite_extent(t, ext);
|
||||
return ecb->rewrite_extent(t, ext, reclaim_state->target_generation);
|
||||
});
|
||||
});
|
||||
}).si_then([this, &t, &seq] {
|
||||
@ -1074,7 +1099,9 @@ AsyncCleaner::mount_ret AsyncCleaner::mount()
|
||||
init_mark_segment_closed(
|
||||
segment_id,
|
||||
header.segment_seq,
|
||||
header.type);
|
||||
header.type,
|
||||
header.category,
|
||||
header.generation);
|
||||
return seastar::now();
|
||||
}).handle_error(
|
||||
crimson::ct_error::enodata::handle(
|
||||
@ -1179,7 +1206,9 @@ AsyncCleaner::scan_extents_ret AsyncCleaner::scan_nonfull_segment(
|
||||
init_mark_segment_closed(
|
||||
segment_id,
|
||||
header.segment_seq,
|
||||
header.type);
|
||||
header.type,
|
||||
header.category,
|
||||
header.generation);
|
||||
return seastar::now();
|
||||
});
|
||||
}
|
||||
|
@ -37,6 +37,10 @@ struct segment_info_t {
|
||||
|
||||
segment_type_t type = segment_type_t::NULL_SEG;
|
||||
|
||||
data_category_t category = data_category_t::NUM;
|
||||
|
||||
reclaim_gen_t generation = NULL_GENERATION;
|
||||
|
||||
time_point last_modified;
|
||||
time_point last_rewritten;
|
||||
|
||||
@ -59,9 +63,12 @@ struct segment_info_t {
|
||||
return state == Segment::segment_state_t::OPEN;
|
||||
}
|
||||
|
||||
void init_closed(segment_seq_t, segment_type_t, std::size_t);
|
||||
void init_closed(segment_seq_t, segment_type_t,
|
||||
data_category_t, reclaim_gen_t,
|
||||
std::size_t);
|
||||
|
||||
void set_open(segment_seq_t, segment_type_t);
|
||||
void set_open(segment_seq_t, segment_type_t,
|
||||
data_category_t, reclaim_gen_t);
|
||||
|
||||
void set_empty();
|
||||
|
||||
@ -190,9 +197,11 @@ public:
|
||||
void add_segment_manager(SegmentManager &segment_manager);
|
||||
|
||||
// initiate non-empty segments, the others are by default empty
|
||||
void init_closed(segment_id_t, segment_seq_t, segment_type_t);
|
||||
void init_closed(segment_id_t, segment_seq_t, segment_type_t,
|
||||
data_category_t, reclaim_gen_t);
|
||||
|
||||
void mark_open(segment_id_t, segment_seq_t, segment_type_t);
|
||||
void mark_open(segment_id_t, segment_seq_t, segment_type_t,
|
||||
data_category_t, reclaim_gen_t);
|
||||
|
||||
void mark_empty(segment_id_t);
|
||||
|
||||
@ -241,7 +250,7 @@ public:
|
||||
virtual const segment_info_t& get_seg_info(segment_id_t id) const = 0;
|
||||
|
||||
virtual segment_id_t allocate_segment(
|
||||
segment_seq_t seq, segment_type_t type) = 0;
|
||||
segment_seq_t, segment_type_t, data_category_t, reclaim_gen_t) = 0;
|
||||
|
||||
virtual journal_seq_t get_dirty_extents_replay_from() const = 0;
|
||||
|
||||
@ -597,7 +606,8 @@ public:
|
||||
using rewrite_extent_ret = rewrite_extent_iertr::future<>;
|
||||
virtual rewrite_extent_ret rewrite_extent(
|
||||
Transaction &t,
|
||||
CachedExtentRef extent) = 0;
|
||||
CachedExtentRef extent,
|
||||
reclaim_gen_t target_generation) = 0;
|
||||
|
||||
/**
|
||||
* get_extent_if_live
|
||||
@ -739,7 +749,7 @@ public:
|
||||
}
|
||||
|
||||
segment_id_t allocate_segment(
|
||||
segment_seq_t seq, segment_type_t type) final;
|
||||
segment_seq_t, segment_type_t, data_category_t, reclaim_gen_t) final;
|
||||
|
||||
void close_segment(segment_id_t segment) final;
|
||||
|
||||
@ -935,14 +945,21 @@ private:
|
||||
}
|
||||
|
||||
struct reclaim_state_t {
|
||||
reclaim_gen_t generation;
|
||||
reclaim_gen_t target_generation;
|
||||
std::size_t segment_size;
|
||||
paddr_t start_pos;
|
||||
paddr_t end_pos;
|
||||
|
||||
static reclaim_state_t create(
|
||||
segment_id_t segment_id,
|
||||
reclaim_gen_t generation,
|
||||
std::size_t segment_size) {
|
||||
return {segment_size,
|
||||
ceph_assert(generation < RECLAIM_GENERATIONS);
|
||||
return {generation,
|
||||
(reclaim_gen_t)(generation == RECLAIM_GENERATIONS - 1 ?
|
||||
generation : generation + 1),
|
||||
segment_size,
|
||||
P_ADDR_NULL,
|
||||
paddr_t::make_seg_paddr(segment_id, 0)};
|
||||
}
|
||||
@ -1280,10 +1297,12 @@ private:
|
||||
void init_mark_segment_closed(
|
||||
segment_id_t segment,
|
||||
segment_seq_t seq,
|
||||
segment_type_t s_type) {
|
||||
segment_type_t s_type,
|
||||
data_category_t category,
|
||||
reclaim_gen_t generation) {
|
||||
ceph_assert(!init_complete);
|
||||
auto old_usage = calc_utilization(segment);
|
||||
segments.init_closed(segment, seq, s_type);
|
||||
segments.init_closed(segment, seq, s_type, category, generation);
|
||||
auto new_usage = calc_utilization(segment);
|
||||
adjust_segment_util(old_usage, new_usage);
|
||||
if (s_type == segment_type_t::OOL) {
|
||||
|
@ -313,7 +313,9 @@ public:
|
||||
static mkfs_ret mkfs(op_context_t<node_key_t> c) {
|
||||
auto root_leaf = c.cache.template alloc_new_extent<leaf_node_t>(
|
||||
c.trans,
|
||||
node_size);
|
||||
node_size,
|
||||
placement_hint_t::HOT,
|
||||
0);
|
||||
root_leaf->set_size(0);
|
||||
fixed_kv_node_meta_t<node_key_t> meta{min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, 1};
|
||||
root_leaf->set_meta(meta);
|
||||
@ -814,7 +816,9 @@ public:
|
||||
std::remove_reference_t<decltype(fixed_kv_extent)>
|
||||
>(
|
||||
c.trans,
|
||||
fixed_kv_extent.get_length());
|
||||
fixed_kv_extent.get_length(),
|
||||
fixed_kv_extent.get_user_hint(),
|
||||
fixed_kv_extent.get_reclaim_generation());
|
||||
fixed_kv_extent.get_bptr().copy_out(
|
||||
0,
|
||||
fixed_kv_extent.get_length(),
|
||||
@ -1400,7 +1404,7 @@ private:
|
||||
|
||||
if (split_from == iter.get_depth()) {
|
||||
auto nroot = c.cache.template alloc_new_extent<internal_node_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
fixed_kv_node_meta_t<node_key_t> meta{
|
||||
min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, iter.get_depth() + 1};
|
||||
nroot->set_meta(meta);
|
||||
|
@ -154,9 +154,9 @@ struct FixedKVInternalNode
|
||||
std::tuple<Ref, Ref, NODE_KEY>
|
||||
make_split_children(op_context_t<NODE_KEY> c) {
|
||||
auto left = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
auto right = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
auto pivot = this->split_into(*left, *right);
|
||||
left->pin.set_range(left->get_meta());
|
||||
right->pin.set_range(right->get_meta());
|
||||
@ -170,7 +170,7 @@ struct FixedKVInternalNode
|
||||
op_context_t<NODE_KEY> c,
|
||||
Ref &right) {
|
||||
auto replacement = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
replacement->merge_from(*this, *right->template cast<node_type_t>());
|
||||
replacement->pin.set_range(replacement->get_meta());
|
||||
return replacement;
|
||||
@ -184,9 +184,9 @@ struct FixedKVInternalNode
|
||||
ceph_assert(_right->get_type() == this->get_type());
|
||||
auto &right = *_right->template cast<node_type_t>();
|
||||
auto replacement_left = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
auto replacement_right = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
|
||||
auto pivot = this->balance_into_new_nodes(
|
||||
*this,
|
||||
@ -355,9 +355,9 @@ struct FixedKVLeafNode
|
||||
std::tuple<Ref, Ref, NODE_KEY>
|
||||
make_split_children(op_context_t<NODE_KEY> c) {
|
||||
auto left = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
auto right = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
auto pivot = this->split_into(*left, *right);
|
||||
left->pin.set_range(left->get_meta());
|
||||
right->pin.set_range(right->get_meta());
|
||||
@ -371,7 +371,7 @@ struct FixedKVLeafNode
|
||||
op_context_t<NODE_KEY> c,
|
||||
Ref &right) {
|
||||
auto replacement = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
replacement->merge_from(*this, *right->template cast<node_type_t>());
|
||||
replacement->pin.set_range(replacement->get_meta());
|
||||
return replacement;
|
||||
@ -385,9 +385,9 @@ struct FixedKVLeafNode
|
||||
ceph_assert(_right->get_type() == this->get_type());
|
||||
auto &right = *_right->template cast<node_type_t>();
|
||||
auto replacement_left = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
auto replacement_right = c.cache.template alloc_new_extent<node_type_t>(
|
||||
c.trans, node_size);
|
||||
c.trans, node_size, placement_hint_t::HOT, 0);
|
||||
|
||||
auto pivot = this->balance_into_new_nodes(
|
||||
*this,
|
||||
|
@ -85,8 +85,10 @@ Cache::retire_extent_ret Cache::retire_extent_addr(
|
||||
// add a new placeholder to Cache
|
||||
ext = CachedExtent::make_cached_extent_ref<
|
||||
RetiredExtentPlaceholder>(length);
|
||||
ext->set_paddr(addr);
|
||||
ext->state = CachedExtent::extent_state_t::CLEAN;
|
||||
ext->init(CachedExtent::extent_state_t::CLEAN,
|
||||
addr,
|
||||
placement_hint_t::NUM_HINTS,
|
||||
NULL_GENERATION);
|
||||
DEBUGT("retire {}~{} as placeholder, add extent -- {}",
|
||||
t, addr, length, *ext);
|
||||
add_extent(ext);
|
||||
@ -924,40 +926,41 @@ void Cache::on_transaction_destruct(Transaction& t)
|
||||
}
|
||||
|
||||
CachedExtentRef Cache::alloc_new_extent_by_type(
|
||||
Transaction &t, ///< [in, out] current transaction
|
||||
extent_types_t type, ///< [in] type tag
|
||||
Transaction &t, ///< [in, out] current transaction
|
||||
extent_types_t type, ///< [in] type tag
|
||||
seastore_off_t length, ///< [in] length
|
||||
placement_hint_t hint
|
||||
placement_hint_t hint, ///< [in] user hint
|
||||
reclaim_gen_t gen ///< [in] reclaim generation
|
||||
)
|
||||
{
|
||||
LOG_PREFIX(Cache::alloc_new_extent_by_type);
|
||||
SUBDEBUGT(seastore_cache, "allocate {} {}B, hint={}",
|
||||
t, type, length, hint);
|
||||
SUBDEBUGT(seastore_cache, "allocate {} {}B, hint={}, gen={}",
|
||||
t, type, length, hint, reclaim_gen_printer_t{gen});
|
||||
switch (type) {
|
||||
case extent_types_t::ROOT:
|
||||
ceph_assert(0 == "ROOT is never directly alloc'd");
|
||||
return CachedExtentRef();
|
||||
case extent_types_t::LADDR_INTERNAL:
|
||||
return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, hint);
|
||||
return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, hint, gen);
|
||||
case extent_types_t::LADDR_LEAF:
|
||||
return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, hint);
|
||||
return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, hint, gen);
|
||||
case extent_types_t::ONODE_BLOCK_STAGED:
|
||||
return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, hint);
|
||||
return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, hint, gen);
|
||||
case extent_types_t::OMAP_INNER:
|
||||
return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, hint);
|
||||
return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, hint, gen);
|
||||
case extent_types_t::OMAP_LEAF:
|
||||
return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, hint);
|
||||
return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, hint, gen);
|
||||
case extent_types_t::COLL_BLOCK:
|
||||
return alloc_new_extent<collection_manager::CollectionNode>(t, length, hint);
|
||||
return alloc_new_extent<collection_manager::CollectionNode>(t, length, hint, gen);
|
||||
case extent_types_t::OBJECT_DATA_BLOCK:
|
||||
return alloc_new_extent<ObjectDataBlock>(t, length, hint);
|
||||
return alloc_new_extent<ObjectDataBlock>(t, length, hint, gen);
|
||||
case extent_types_t::RETIRED_PLACEHOLDER:
|
||||
ceph_assert(0 == "impossible");
|
||||
return CachedExtentRef();
|
||||
case extent_types_t::TEST_BLOCK:
|
||||
return alloc_new_extent<TestBlock>(t, length, hint);
|
||||
return alloc_new_extent<TestBlock>(t, length, hint, gen);
|
||||
case extent_types_t::TEST_BLOCK_PHYSICAL:
|
||||
return alloc_new_extent<TestBlockPhysical>(t, length, hint);
|
||||
return alloc_new_extent<TestBlockPhysical>(t, length, hint, gen);
|
||||
case extent_types_t::NONE: {
|
||||
ceph_assert(0 == "NONE is an invalid extent type");
|
||||
return CachedExtentRef();
|
||||
@ -986,6 +989,7 @@ CachedExtentRef Cache::duplicate_for_write(
|
||||
|
||||
ret->version++;
|
||||
ret->state = CachedExtent::extent_state_t::MUTATION_PENDING;
|
||||
ret->set_reclaim_generation(DIRTY_GENERATION);
|
||||
DEBUGT("{} -> {}", t, *i, *ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -303,8 +303,10 @@ public:
|
||||
if (!cached) {
|
||||
auto ret = CachedExtent::make_cached_extent_ref<T>(
|
||||
alloc_cache_buf(length));
|
||||
ret->set_paddr(offset);
|
||||
ret->state = CachedExtent::extent_state_t::CLEAN_PENDING;
|
||||
ret->init(CachedExtent::extent_state_t::CLEAN_PENDING,
|
||||
offset,
|
||||
placement_hint_t::NUM_HINTS,
|
||||
NULL_GENERATION);
|
||||
SUBDEBUG(seastore_cache,
|
||||
"{} {}~{} is absent, add extent and reading ... -- {}",
|
||||
T::TYPE, offset, length, *ret);
|
||||
@ -319,8 +321,10 @@ public:
|
||||
if (cached->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
|
||||
auto ret = CachedExtent::make_cached_extent_ref<T>(
|
||||
alloc_cache_buf(length));
|
||||
ret->set_paddr(offset);
|
||||
ret->state = CachedExtent::extent_state_t::CLEAN_PENDING;
|
||||
ret->init(CachedExtent::extent_state_t::CLEAN_PENDING,
|
||||
offset,
|
||||
placement_hint_t::NUM_HINTS,
|
||||
NULL_GENERATION);
|
||||
SUBDEBUG(seastore_cache,
|
||||
"{} {}~{} is absent(placeholder), reading ... -- {}",
|
||||
T::TYPE, offset, length, *ret);
|
||||
@ -681,19 +685,23 @@ public:
|
||||
TCachedExtentRef<T> alloc_new_extent(
|
||||
Transaction &t, ///< [in, out] current transaction
|
||||
seastore_off_t length, ///< [in] length
|
||||
placement_hint_t hint = placement_hint_t::HOT
|
||||
placement_hint_t hint, ///< [in] user hint
|
||||
reclaim_gen_t gen ///< [in] reclaim generation
|
||||
) {
|
||||
LOG_PREFIX(Cache::alloc_new_extent);
|
||||
SUBTRACET(seastore_cache, "allocate {} {}B, hint={}",
|
||||
t, T::TYPE, length, hint);
|
||||
auto result = epm.alloc_new_extent(t, T::TYPE, length, hint);
|
||||
SUBTRACET(seastore_cache, "allocate {} {}B, hint={}, gen={}",
|
||||
t, T::TYPE, length, hint, reclaim_gen_printer_t{gen});
|
||||
auto result = epm.alloc_new_extent(t, T::TYPE, length, hint, gen);
|
||||
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));
|
||||
ret->set_paddr(result.paddr);
|
||||
ret->hint = hint;
|
||||
ret->state = CachedExtent::extent_state_t::INITIAL_WRITE_PENDING;
|
||||
ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING,
|
||||
result.paddr,
|
||||
hint,
|
||||
result.gen);
|
||||
t.add_fresh_extent(ret);
|
||||
SUBDEBUGT(seastore_cache, "allocated {} {}B extent at {}, hint={} -- {}",
|
||||
t, T::TYPE, length, result.paddr, hint, *ret);
|
||||
SUBDEBUGT(seastore_cache,
|
||||
"allocated {} {}B extent at {}, hint={}, gen={} -- {}",
|
||||
t, T::TYPE, length, result.paddr,
|
||||
hint, reclaim_gen_printer_t{result.gen}, *ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -703,10 +711,11 @@ public:
|
||||
* Allocates a fresh extent. addr will be relative until commit.
|
||||
*/
|
||||
CachedExtentRef alloc_new_extent_by_type(
|
||||
Transaction &t, ///< [in, out] current transaction
|
||||
extent_types_t type, ///< [in] type tag
|
||||
Transaction &t, ///< [in, out] current transaction
|
||||
extent_types_t type, ///< [in] type tag
|
||||
seastore_off_t length, ///< [in] length
|
||||
placement_hint_t hint = placement_hint_t::HOT
|
||||
placement_hint_t hint, ///< [in] user hint
|
||||
reclaim_gen_t gen ///< [in] reclaim generation
|
||||
);
|
||||
|
||||
/**
|
||||
|
@ -105,7 +105,17 @@ class CachedExtent : public boost::intrusive_ref_counter<
|
||||
|
||||
// time of the last rewrite
|
||||
seastar::lowres_system_clock::time_point last_rewritten;
|
||||
|
||||
public:
|
||||
void init(extent_state_t _state,
|
||||
paddr_t paddr,
|
||||
placement_hint_t hint,
|
||||
reclaim_gen_t gen) {
|
||||
state = _state;
|
||||
set_paddr(paddr);
|
||||
user_hint = hint;
|
||||
reclaim_generation = gen;
|
||||
}
|
||||
|
||||
void set_last_modified(seastar::lowres_system_clock::duration d) {
|
||||
last_modified = seastar::lowres_system_clock::time_point(d);
|
||||
@ -209,7 +219,9 @@ public:
|
||||
<< ", length=" << get_length()
|
||||
<< ", state=" << state
|
||||
<< ", last_committed_crc=" << last_committed_crc
|
||||
<< ", refcount=" << use_count();
|
||||
<< ", refcount=" << use_count()
|
||||
<< ", user_hint=" << user_hint
|
||||
<< ", reclaim_gen=" << reclaim_generation;
|
||||
if (state != extent_state_t::INVALID &&
|
||||
state != extent_state_t::CLEAN_PENDING) {
|
||||
print_detail(out);
|
||||
@ -374,8 +386,24 @@ public:
|
||||
|
||||
virtual ~CachedExtent();
|
||||
|
||||
/// hint for allocators
|
||||
placement_hint_t hint = placement_hint_t::NUM_HINTS;
|
||||
placement_hint_t get_user_hint() const {
|
||||
return user_hint;
|
||||
}
|
||||
|
||||
reclaim_gen_t get_reclaim_generation() const {
|
||||
return reclaim_generation;
|
||||
}
|
||||
|
||||
void invalidate_hints() {
|
||||
user_hint = placement_hint_t::NUM_HINTS;
|
||||
reclaim_generation = NULL_GENERATION;
|
||||
}
|
||||
|
||||
void set_reclaim_generation(reclaim_gen_t gen) {
|
||||
assert(gen < RECLAIM_GENERATIONS);
|
||||
user_hint = placement_hint_t::REWRITE;
|
||||
reclaim_generation = gen;
|
||||
}
|
||||
|
||||
bool is_inline() const {
|
||||
return poffset.is_relative();
|
||||
@ -454,6 +482,11 @@ private:
|
||||
|
||||
read_set_item_t<Transaction>::list transactions;
|
||||
|
||||
placement_hint_t user_hint;
|
||||
|
||||
/// > 0 and not null means the extent is under reclaimming
|
||||
reclaim_gen_t reclaim_generation;
|
||||
|
||||
protected:
|
||||
CachedExtent(CachedExtent &&other) = delete;
|
||||
CachedExtent(ceph::bufferptr &&ptr) : ptr(std::move(ptr)) {}
|
||||
|
@ -10,10 +10,11 @@ SET_SUBSYS(seastore_journal);
|
||||
namespace crimson::os::seastore {
|
||||
|
||||
SegmentedOolWriter::SegmentedOolWriter(
|
||||
std::string name,
|
||||
data_category_t category,
|
||||
reclaim_gen_t gen,
|
||||
SegmentProvider& sp,
|
||||
SegmentSeqAllocator &ssa)
|
||||
: segment_allocator(name, segment_type_t::OOL, sp, ssa),
|
||||
: segment_allocator(segment_type_t::OOL, category, gen, sp, ssa),
|
||||
record_submitter(crimson::common::get_conf<uint64_t>(
|
||||
"seastore_journal_iodepth_limit"),
|
||||
crimson::common::get_conf<uint64_t>(
|
||||
@ -55,7 +56,7 @@ SegmentedOolWriter::write_record(
|
||||
TRACET("{} ool extent written at {} -- {}",
|
||||
t, segment_allocator.get_name(),
|
||||
extent_addr, *extent);
|
||||
extent->hint = placement_hint_t::NUM_HINTS; // invalidate hint
|
||||
extent->invalidate_hints();
|
||||
t.mark_delayed_extent_ool(extent, extent_addr);
|
||||
extent_addr = extent_addr.as_seg_paddr().add_offset(
|
||||
extent->get_length());
|
||||
|
@ -48,7 +48,8 @@ class SegmentProvider;
|
||||
*/
|
||||
class SegmentedOolWriter : public ExtentOolWriter {
|
||||
public:
|
||||
SegmentedOolWriter(std::string name,
|
||||
SegmentedOolWriter(data_category_t category,
|
||||
reclaim_gen_t gen,
|
||||
SegmentProvider &sp,
|
||||
SegmentSeqAllocator &ssa);
|
||||
|
||||
@ -85,26 +86,29 @@ private:
|
||||
|
||||
class ExtentPlacementManager {
|
||||
public:
|
||||
ExtentPlacementManager() {
|
||||
ExtentPlacementManager(bool prefer_ool)
|
||||
: prefer_ool{prefer_ool} {
|
||||
devices_by_id.resize(DEVICE_ID_GLOBAL_MAX, nullptr);
|
||||
}
|
||||
|
||||
void init_ool_writers(SegmentProvider &sp, SegmentSeqAllocator &ssa) {
|
||||
// Currently only one SegmentProvider is supported, so hardcode the
|
||||
// writers_by_hint for now.
|
||||
writer_seed = 0;
|
||||
// Currently only one SegmentProvider is supported
|
||||
writer_refs.clear();
|
||||
writers_by_hint.resize((std::size_t)placement_hint_t::NUM_HINTS, {});
|
||||
|
||||
// ool writer is not supported for placement_hint_t::HOT
|
||||
writer_refs.emplace_back(
|
||||
std::make_unique<SegmentedOolWriter>("COLD", sp, ssa));
|
||||
writers_by_hint[(std::size_t)placement_hint_t::COLD
|
||||
].emplace_back(writer_refs.back().get());
|
||||
writer_refs.emplace_back(
|
||||
std::make_unique<SegmentedOolWriter>("REWRITE", sp, ssa));
|
||||
writers_by_hint[(std::size_t)placement_hint_t::REWRITE
|
||||
].emplace_back(writer_refs.back().get());
|
||||
ceph_assert(RECLAIM_GENERATIONS > 0);
|
||||
data_writers_by_gen.resize(RECLAIM_GENERATIONS, {});
|
||||
for (reclaim_gen_t gen = 0; gen < RECLAIM_GENERATIONS; ++gen) {
|
||||
writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
|
||||
data_category_t::DATA, gen, sp, ssa));
|
||||
data_writers_by_gen[gen] = writer_refs.back().get();
|
||||
}
|
||||
|
||||
md_writers_by_gen.resize(RECLAIM_GENERATIONS - 1, {});
|
||||
for (reclaim_gen_t gen = 1; gen < RECLAIM_GENERATIONS; ++gen) {
|
||||
writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
|
||||
data_category_t::METADATA, gen, sp, ssa));
|
||||
md_writers_by_gen[gen - 1] = writer_refs.back().get();
|
||||
}
|
||||
}
|
||||
|
||||
void add_device(Device* device, bool is_primary) {
|
||||
@ -132,8 +136,10 @@ public:
|
||||
open_ertr::future<> open() {
|
||||
LOG_PREFIX(ExtentPlacementManager::open);
|
||||
SUBINFO(seastore_journal, "started");
|
||||
return crimson::do_for_each(writers_by_hint, [](auto& writers) {
|
||||
return crimson::do_for_each(writers, [](auto& writer) {
|
||||
return crimson::do_for_each(data_writers_by_gen, [](auto &writer) {
|
||||
return writer->open();
|
||||
}).safe_then([this] {
|
||||
return crimson::do_for_each(md_writers_by_gen, [](auto &writer) {
|
||||
return writer->open();
|
||||
});
|
||||
});
|
||||
@ -142,14 +148,18 @@ public:
|
||||
struct alloc_result_t {
|
||||
paddr_t paddr;
|
||||
bufferptr bp;
|
||||
reclaim_gen_t gen;
|
||||
};
|
||||
alloc_result_t alloc_new_extent(
|
||||
Transaction& t,
|
||||
extent_types_t type,
|
||||
seastore_off_t length,
|
||||
placement_hint_t hint
|
||||
placement_hint_t hint,
|
||||
reclaim_gen_t gen
|
||||
) {
|
||||
assert(hint < placement_hint_t::NUM_HINTS);
|
||||
assert(gen < RECLAIM_GENERATIONS);
|
||||
assert(gen == 0 || hint == placement_hint_t::REWRITE);
|
||||
|
||||
// XXX: bp might be extended to point to differnt memory (e.g. PMem)
|
||||
// according to the allocator.
|
||||
@ -160,19 +170,35 @@ public:
|
||||
if (!is_logical_type(type)) {
|
||||
// TODO: implement out-of-line strategy for physical extent.
|
||||
return {make_record_relative_paddr(0),
|
||||
std::move(bp)};
|
||||
std::move(bp),
|
||||
0};
|
||||
}
|
||||
|
||||
// FIXME: set delay for COLD extent and improve GC
|
||||
// NOTE: delay means to delay the decision about whether to write the
|
||||
// extent as inline or out-of-line extents.
|
||||
bool delay = (hint > placement_hint_t::COLD);
|
||||
if (delay) {
|
||||
if (hint == placement_hint_t::COLD) {
|
||||
assert(gen == 0);
|
||||
return {make_delayed_temp_paddr(0),
|
||||
std::move(bp)};
|
||||
std::move(bp),
|
||||
COLD_GENERATION};
|
||||
}
|
||||
|
||||
if (get_extent_category(type) == data_category_t::METADATA &&
|
||||
gen == 0) {
|
||||
// gen 0 METADATA writer is the journal writer
|
||||
if (prefer_ool) {
|
||||
return {make_delayed_temp_paddr(0),
|
||||
std::move(bp),
|
||||
1};
|
||||
} else {
|
||||
return {make_record_relative_paddr(0),
|
||||
std::move(bp),
|
||||
0};
|
||||
}
|
||||
} else {
|
||||
return {make_record_relative_paddr(0),
|
||||
std::move(bp)};
|
||||
assert(get_extent_category(type) == data_category_t::DATA ||
|
||||
gen > 0);
|
||||
return {make_delayed_temp_paddr(0),
|
||||
std::move(bp),
|
||||
gen};
|
||||
}
|
||||
}
|
||||
|
||||
@ -193,7 +219,10 @@ public:
|
||||
[this, &t, &delayed_extents](auto& alloc_map) {
|
||||
for (auto& extent : delayed_extents) {
|
||||
// For now, just do ool allocation for any delayed extent
|
||||
auto writer_ptr = get_writer(extent->hint);
|
||||
auto writer_ptr = get_writer(
|
||||
extent->get_user_hint(),
|
||||
get_extent_category(extent->get_type()),
|
||||
extent->get_reclaim_generation());
|
||||
alloc_map[writer_ptr].emplace_back(extent);
|
||||
}
|
||||
return trans_intr::do_for_each(alloc_map, [&t](auto& p) {
|
||||
@ -208,8 +237,10 @@ public:
|
||||
close_ertr::future<> close() {
|
||||
LOG_PREFIX(ExtentPlacementManager::close);
|
||||
SUBINFO(seastore_journal, "started");
|
||||
return crimson::do_for_each(writers_by_hint, [](auto& writers) {
|
||||
return crimson::do_for_each(writers, [](auto& writer) {
|
||||
return crimson::do_for_each(data_writers_by_gen, [](auto &writer) {
|
||||
return writer->close();
|
||||
}).safe_then([this] {
|
||||
return crimson::do_for_each(md_writers_by_gen, [](auto &writer) {
|
||||
return writer->close();
|
||||
});
|
||||
}).safe_then([this] {
|
||||
@ -230,18 +261,27 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
ExtentOolWriter* get_writer(placement_hint_t hint) {
|
||||
ExtentOolWriter* get_writer(placement_hint_t hint,
|
||||
data_category_t category,
|
||||
reclaim_gen_t gen) {
|
||||
assert(hint < placement_hint_t::NUM_HINTS);
|
||||
auto hint_index = static_cast<std::size_t>(hint);
|
||||
assert(hint_index < writers_by_hint.size());
|
||||
auto& writers = writers_by_hint[hint_index];
|
||||
assert(writers.size() > 0);
|
||||
return writers[writer_seed++ % writers.size()];
|
||||
assert(gen < RECLAIM_GENERATIONS);
|
||||
if (category == data_category_t::DATA) {
|
||||
return data_writers_by_gen[gen];
|
||||
} else {
|
||||
assert(category == data_category_t::METADATA);
|
||||
// gen 0 METADATA writer is the journal writer
|
||||
assert(gen > 0);
|
||||
return md_writers_by_gen[gen - 1];
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t writer_seed = 0;
|
||||
bool prefer_ool;
|
||||
std::vector<ExtentOolWriterRef> writer_refs;
|
||||
std::vector<std::vector<ExtentOolWriter*>> writers_by_hint;
|
||||
std::vector<ExtentOolWriter*> data_writers_by_gen;
|
||||
// gen 0 METADATA writer is the journal writer
|
||||
std::vector<ExtentOolWriter*> md_writers_by_gen;
|
||||
|
||||
std::vector<Device*> devices_by_id;
|
||||
Device* primary_device = nullptr;
|
||||
};
|
||||
|
@ -13,13 +13,15 @@ SET_SUBSYS(seastore_journal);
|
||||
namespace crimson::os::seastore::journal {
|
||||
|
||||
SegmentAllocator::SegmentAllocator(
|
||||
std::string name,
|
||||
segment_type_t type,
|
||||
data_category_t category,
|
||||
reclaim_gen_t gen,
|
||||
SegmentProvider &sp,
|
||||
SegmentSeqAllocator &ssa)
|
||||
: name{name},
|
||||
print_name{fmt::format("D?_{}", name)},
|
||||
: print_name{fmt::format("{}_G{}", category, gen)},
|
||||
type{type},
|
||||
category{category},
|
||||
gen{gen},
|
||||
segment_provider{sp},
|
||||
sm_group{*sp.get_segment_manager_group()},
|
||||
segment_seq_allocator(ssa)
|
||||
@ -40,7 +42,8 @@ SegmentAllocator::do_open()
|
||||
new_segment_seq,
|
||||
reinterpret_cast<const unsigned char *>(meta.seastore_id.bytes()),
|
||||
sizeof(meta.seastore_id.uuid));
|
||||
auto new_segment_id = segment_provider.allocate_segment(new_segment_seq, type);
|
||||
auto new_segment_id = segment_provider.allocate_segment(
|
||||
new_segment_seq, type, category, gen);
|
||||
ceph_assert(new_segment_id != NULL_SEG_ID);
|
||||
return sm_group.open(new_segment_id
|
||||
).handle_error(
|
||||
@ -66,7 +69,9 @@ SegmentAllocator::do_open()
|
||||
new_journal_tail,
|
||||
new_alloc_replay_from,
|
||||
current_segment_nonce,
|
||||
type};
|
||||
type,
|
||||
category,
|
||||
gen};
|
||||
INFO("{} writing header to new segment ... -- {}",
|
||||
print_name, header);
|
||||
|
||||
@ -124,7 +129,8 @@ SegmentAllocator::open()
|
||||
for (auto& device_id : device_ids) {
|
||||
oss << "_" << device_id_printer_t{device_id};
|
||||
}
|
||||
oss << "_" << name;
|
||||
oss << "_"
|
||||
<< fmt::format("{}_G{}", category, gen);
|
||||
print_name = oss.str();
|
||||
|
||||
INFO("{}", print_name);
|
||||
|
@ -30,8 +30,9 @@ class SegmentAllocator {
|
||||
crimson::ct_error::input_output_error>;
|
||||
|
||||
public:
|
||||
SegmentAllocator(std::string name,
|
||||
segment_type_t type,
|
||||
SegmentAllocator(segment_type_t type,
|
||||
data_category_t category,
|
||||
reclaim_gen_t gen,
|
||||
SegmentProvider &sp,
|
||||
SegmentSeqAllocator &ssa);
|
||||
|
||||
@ -111,11 +112,12 @@ class SegmentAllocator {
|
||||
using close_segment_ertr = base_ertr;
|
||||
close_segment_ertr::future<> close_segment();
|
||||
|
||||
const std::string name;
|
||||
// device id is not available during construction,
|
||||
// so generate the print_name later.
|
||||
std::string print_name;
|
||||
const segment_type_t type; // JOURNAL or OOL
|
||||
const data_category_t category;
|
||||
const reclaim_gen_t gen;
|
||||
SegmentProvider &segment_provider;
|
||||
SegmentManagerGroup &sm_group;
|
||||
SegmentRef current_segment;
|
||||
|
@ -31,8 +31,9 @@ SegmentedJournal::SegmentedJournal(
|
||||
: segment_provider(segment_provider),
|
||||
segment_seq_allocator(
|
||||
new SegmentSeqAllocator(segment_type_t::JOURNAL)),
|
||||
journal_segment_allocator("JOURNAL",
|
||||
segment_type_t::JOURNAL,
|
||||
journal_segment_allocator(segment_type_t::JOURNAL,
|
||||
data_category_t::METADATA,
|
||||
0, // generation
|
||||
segment_provider,
|
||||
*segment_seq_allocator),
|
||||
record_submitter(crimson::common::get_conf<uint64_t>(
|
||||
|
@ -173,6 +173,29 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, reclaim_gen_printer_t gen)
|
||||
{
|
||||
if (gen.gen == NULL_GENERATION) {
|
||||
return out << "NULL_GEN";
|
||||
} else if (gen.gen >= RECLAIM_GENERATIONS) {
|
||||
return out << "INVALID_GEN(" << (unsigned)gen.gen << ")";
|
||||
} else {
|
||||
return out << "GEN(" << (unsigned)gen.gen << ")";
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, data_category_t c)
|
||||
{
|
||||
switch (c) {
|
||||
case data_category_t::METADATA:
|
||||
return out << "MD";
|
||||
case data_category_t::DATA:
|
||||
return out << "DATA";
|
||||
default:
|
||||
return out << "INVALID_CATEGORY!";
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, const laddr_list_t &rhs)
|
||||
{
|
||||
bool first = false;
|
||||
@ -224,6 +247,8 @@ std::ostream &operator<<(std::ostream &out, const segment_header_t &header)
|
||||
<< ", journal_tail=" << header.journal_tail
|
||||
<< ", segment_nonce=" << header.segment_nonce
|
||||
<< ", type=" << header.type
|
||||
<< ", category=" << header.category
|
||||
<< ", generaton=" << (unsigned)header.generation
|
||||
<< ")";
|
||||
}
|
||||
|
||||
|
@ -757,9 +757,9 @@ constexpr objaddr_t OBJ_ADDR_MAX = std::numeric_limits<objaddr_t>::max();
|
||||
constexpr objaddr_t OBJ_ADDR_NULL = OBJ_ADDR_MAX;
|
||||
|
||||
enum class placement_hint_t {
|
||||
HOT = 0, // Most of the metadata
|
||||
COLD, // Object data
|
||||
REWRITE, // Cold metadata and data (probably need further splits)
|
||||
HOT = 0, // The default user hint that expects mutations or retirement
|
||||
COLD, // Expect no mutations and no retirement in the near future
|
||||
REWRITE, // Hint for the internal rewrites
|
||||
NUM_HINTS // Constant for number of hints
|
||||
};
|
||||
|
||||
@ -973,6 +973,37 @@ constexpr bool is_backref_node(extent_types_t type)
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, extent_types_t t);
|
||||
|
||||
using reclaim_gen_t = uint8_t;
|
||||
|
||||
constexpr reclaim_gen_t DIRTY_GENERATION = 1;
|
||||
constexpr reclaim_gen_t COLD_GENERATION = 1;
|
||||
constexpr reclaim_gen_t RECLAIM_GENERATIONS = 3;
|
||||
constexpr reclaim_gen_t NULL_GENERATION =
|
||||
std::numeric_limits<reclaim_gen_t>::max();
|
||||
|
||||
struct reclaim_gen_printer_t {
|
||||
reclaim_gen_t gen;
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, reclaim_gen_printer_t gen);
|
||||
|
||||
enum class data_category_t : uint8_t {
|
||||
METADATA = 0,
|
||||
DATA,
|
||||
NUM
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &out, data_category_t c);
|
||||
|
||||
constexpr data_category_t get_extent_category(extent_types_t type) {
|
||||
if (type == extent_types_t::OBJECT_DATA_BLOCK ||
|
||||
type == extent_types_t::COLL_BLOCK) {
|
||||
return data_category_t::DATA;
|
||||
} else {
|
||||
return data_category_t::METADATA;
|
||||
}
|
||||
}
|
||||
|
||||
enum class record_commit_type_t : uint8_t {
|
||||
NONE,
|
||||
MODIFY,
|
||||
@ -1419,6 +1450,9 @@ struct segment_header_t {
|
||||
|
||||
segment_type_t type;
|
||||
|
||||
data_category_t category;
|
||||
reclaim_gen_t generation;
|
||||
|
||||
segment_type_t get_type() const {
|
||||
return type;
|
||||
}
|
||||
@ -1431,6 +1465,8 @@ struct segment_header_t {
|
||||
denc(v.alloc_replay_from, p);
|
||||
denc(v.segment_nonce, p);
|
||||
denc(v.type, p);
|
||||
denc(v.category, p);
|
||||
denc(v.generation, p);
|
||||
DENC_FINISH(p);
|
||||
}
|
||||
};
|
||||
|
@ -28,16 +28,14 @@ TransactionManager::TransactionManager(
|
||||
CacheRef _cache,
|
||||
LBAManagerRef _lba_manager,
|
||||
ExtentPlacementManagerRef &&epm,
|
||||
BackrefManagerRef&& backref_manager,
|
||||
tm_make_config_t config)
|
||||
BackrefManagerRef&& backref_manager)
|
||||
: async_cleaner(std::move(_async_cleaner)),
|
||||
cache(std::move(_cache)),
|
||||
lba_manager(std::move(_lba_manager)),
|
||||
journal(std::move(_journal)),
|
||||
epm(std::move(epm)),
|
||||
backref_manager(std::move(backref_manager)),
|
||||
sm_group(*async_cleaner->get_segment_manager_group()),
|
||||
config(config)
|
||||
sm_group(*async_cleaner->get_segment_manager_group())
|
||||
{
|
||||
async_cleaner->set_extent_callback(this);
|
||||
journal->set_write_pipeline(&write_pipeline);
|
||||
@ -473,7 +471,8 @@ TransactionManager::rewrite_logical_extent(
|
||||
t,
|
||||
lextent->get_type(),
|
||||
lextent->get_length(),
|
||||
placement_hint_t::REWRITE)->cast<LogicalCachedExtent>();
|
||||
lextent->get_user_hint(),
|
||||
lextent->get_reclaim_generation())->cast<LogicalCachedExtent>();
|
||||
lextent->get_bptr().copy_out(
|
||||
0,
|
||||
lextent->get_length(),
|
||||
@ -497,7 +496,8 @@ TransactionManager::rewrite_logical_extent(
|
||||
|
||||
TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
|
||||
Transaction &t,
|
||||
CachedExtentRef extent)
|
||||
CachedExtentRef extent,
|
||||
reclaim_gen_t target_generation)
|
||||
{
|
||||
LOG_PREFIX(TransactionManager::rewrite_extent);
|
||||
|
||||
@ -511,6 +511,13 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
|
||||
ceph_assert(!extent->is_pending_io());
|
||||
}
|
||||
|
||||
assert(extent->is_valid() && !extent->is_initial_pending());
|
||||
if (extent->is_dirty()) {
|
||||
extent->set_reclaim_generation(DIRTY_GENERATION);
|
||||
} else {
|
||||
extent->set_reclaim_generation(target_generation);
|
||||
}
|
||||
|
||||
t.get_rewrite_version_stats().increment(extent->get_version());
|
||||
|
||||
if (is_backref_node(extent->get_type())) {
|
||||
@ -640,7 +647,7 @@ TransactionManager::~TransactionManager() {}
|
||||
TransactionManagerRef make_transaction_manager(tm_make_config_t config)
|
||||
{
|
||||
LOG_PREFIX(make_transaction_manager);
|
||||
auto epm = std::make_unique<ExtentPlacementManager>();
|
||||
auto epm = std::make_unique<ExtentPlacementManager>(config.epm_prefer_ool);
|
||||
auto cache = std::make_unique<Cache>(*epm);
|
||||
auto lba_manager = lba_manager::create_lba_manager(*cache);
|
||||
auto sms = std::make_unique<SegmentManagerGroup>();
|
||||
@ -681,8 +688,7 @@ TransactionManagerRef make_transaction_manager(tm_make_config_t config)
|
||||
std::move(cache),
|
||||
std::move(lba_manager),
|
||||
std::move(epm),
|
||||
std::move(backref_manager),
|
||||
config);
|
||||
std::move(backref_manager));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -35,15 +35,16 @@ namespace crimson::os::seastore {
|
||||
class Journal;
|
||||
|
||||
struct tm_make_config_t {
|
||||
bool is_test = true;
|
||||
journal_type_t j_type = journal_type_t::SEGMENT_JOURNAL;
|
||||
placement_hint_t default_placement_hint = placement_hint_t::HOT;
|
||||
bool is_test;
|
||||
journal_type_t j_type;
|
||||
bool epm_prefer_ool;
|
||||
reclaim_gen_t default_generation;
|
||||
|
||||
static tm_make_config_t get_default() {
|
||||
return tm_make_config_t {
|
||||
false,
|
||||
journal_type_t::SEGMENT_JOURNAL,
|
||||
placement_hint_t::HOT
|
||||
false
|
||||
};
|
||||
}
|
||||
static tm_make_config_t get_test_segmented_journal() {
|
||||
@ -52,7 +53,7 @@ struct tm_make_config_t {
|
||||
return tm_make_config_t {
|
||||
true,
|
||||
journal_type_t::SEGMENT_JOURNAL,
|
||||
placement_hint_t::HOT
|
||||
false
|
||||
};
|
||||
}
|
||||
static tm_make_config_t get_test_cb_journal() {
|
||||
@ -61,7 +62,7 @@ struct tm_make_config_t {
|
||||
return tm_make_config_t {
|
||||
true,
|
||||
journal_type_t::CIRCULARBOUNDED_JOURNAL,
|
||||
placement_hint_t::REWRITE
|
||||
true
|
||||
};
|
||||
}
|
||||
|
||||
@ -71,9 +72,9 @@ private:
|
||||
tm_make_config_t(
|
||||
bool is_test,
|
||||
journal_type_t j_type,
|
||||
placement_hint_t default_placement_hint)
|
||||
bool epm_prefer_ool)
|
||||
: is_test(is_test), j_type(j_type),
|
||||
default_placement_hint(default_placement_hint)
|
||||
epm_prefer_ool(epm_prefer_ool)
|
||||
{}
|
||||
};
|
||||
|
||||
@ -114,8 +115,7 @@ public:
|
||||
CacheRef cache,
|
||||
LBAManagerRef lba_manager,
|
||||
ExtentPlacementManagerRef &&epm,
|
||||
BackrefManagerRef&& backref_manager,
|
||||
tm_make_config_t config = tm_make_config_t::get_default());
|
||||
BackrefManagerRef&& backref_manager);
|
||||
|
||||
/// Writes initial metadata to disk
|
||||
using mkfs_ertr = base_ertr;
|
||||
@ -338,14 +338,8 @@ public:
|
||||
alloc_extent_ret<T> alloc_extent(
|
||||
Transaction &t,
|
||||
laddr_t laddr_hint,
|
||||
extent_len_t len) {
|
||||
placement_hint_t placement_hint;
|
||||
if constexpr (T::TYPE == extent_types_t::OBJECT_DATA_BLOCK ||
|
||||
T::TYPE == extent_types_t::COLL_BLOCK) {
|
||||
placement_hint = placement_hint_t::COLD;
|
||||
} else {
|
||||
placement_hint = config.default_placement_hint;
|
||||
}
|
||||
extent_len_t len,
|
||||
placement_hint_t placement_hint = placement_hint_t::HOT) {
|
||||
LOG_PREFIX(TransactionManager::alloc_extent);
|
||||
SUBTRACET(seastore_tm, "{} len={}, placement_hint={}, laddr_hint={}",
|
||||
t, T::TYPE, len, placement_hint, laddr_hint);
|
||||
@ -353,7 +347,8 @@ public:
|
||||
auto ext = cache->alloc_new_extent<T>(
|
||||
t,
|
||||
len,
|
||||
placement_hint);
|
||||
placement_hint,
|
||||
0);
|
||||
return lba_manager->alloc_extent(
|
||||
t,
|
||||
laddr_hint,
|
||||
@ -447,7 +442,8 @@ public:
|
||||
using AsyncCleaner::ExtentCallbackInterface::rewrite_extent_ret;
|
||||
rewrite_extent_ret rewrite_extent(
|
||||
Transaction &t,
|
||||
CachedExtentRef extent) final;
|
||||
CachedExtentRef extent,
|
||||
reclaim_gen_t target_generation) final;
|
||||
|
||||
using AsyncCleaner::ExtentCallbackInterface::get_extent_if_live_ret;
|
||||
get_extent_if_live_ret get_extent_if_live(
|
||||
@ -608,10 +604,10 @@ private:
|
||||
|
||||
WritePipeline write_pipeline;
|
||||
|
||||
tm_make_config_t config;
|
||||
rewrite_extent_ret rewrite_logical_extent(
|
||||
Transaction& t,
|
||||
LogicalCachedExtentRef extent);
|
||||
|
||||
public:
|
||||
// Testing interfaces
|
||||
auto get_async_cleaner() {
|
||||
|
@ -60,7 +60,9 @@ struct btree_test_base :
|
||||
|
||||
segment_id_t allocate_segment(
|
||||
segment_seq_t seq,
|
||||
segment_type_t type
|
||||
segment_type_t type,
|
||||
data_category_t,
|
||||
reclaim_gen_t
|
||||
) final {
|
||||
auto ret = next;
|
||||
next = segment_id_t{
|
||||
@ -111,7 +113,7 @@ struct btree_test_base :
|
||||
}).safe_then([this] {
|
||||
sms.reset(new SegmentManagerGroup());
|
||||
journal = journal::make_segmented(*this);
|
||||
epm.reset(new ExtentPlacementManager());
|
||||
epm.reset(new ExtentPlacementManager(false));
|
||||
cache.reset(new Cache(*epm));
|
||||
|
||||
block_size = segment_manager->get_block_size();
|
||||
@ -368,7 +370,11 @@ struct btree_lba_manager_test : btree_test_base {
|
||||
test_lba_mappings
|
||||
};
|
||||
if (create_fake_extent) {
|
||||
cache->alloc_new_extent<TestBlockPhysical>(*t.t, TestBlockPhysical::SIZE);
|
||||
cache->alloc_new_extent<TestBlockPhysical>(
|
||||
*t.t,
|
||||
TestBlockPhysical::SIZE,
|
||||
placement_hint_t::HOT,
|
||||
0);
|
||||
};
|
||||
return t;
|
||||
}
|
||||
|
@ -135,7 +135,7 @@ struct cbjournal_test_t : public seastar_test_suite_t
|
||||
|
||||
cbjournal_test_t() :
|
||||
segment_manager(segment_manager::create_test_ephemeral()),
|
||||
epm(new ExtentPlacementManager()),
|
||||
epm(new ExtentPlacementManager(true)),
|
||||
cache(*epm)
|
||||
{
|
||||
device = new nvme_device::TestMemory(CBTEST_DEFAULT_TEST_SIZE);
|
||||
|
@ -88,7 +88,7 @@ struct cache_test_t : public seastar_test_suite_t {
|
||||
return segment_manager->mkfs(
|
||||
segment_manager::get_ephemeral_device_config(0, 1));
|
||||
}).safe_then([this] {
|
||||
epm.reset(new ExtentPlacementManager());
|
||||
epm.reset(new ExtentPlacementManager(false));
|
||||
cache.reset(new Cache(*epm));
|
||||
current = paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0);
|
||||
epm->add_device(segment_manager.get(), true);
|
||||
@ -131,7 +131,9 @@ TEST_F(cache_test_t, test_addr_fixup)
|
||||
auto t = get_transaction();
|
||||
auto extent = cache->alloc_new_extent<TestBlockPhysical>(
|
||||
*t,
|
||||
TestBlockPhysical::SIZE);
|
||||
TestBlockPhysical::SIZE,
|
||||
placement_hint_t::HOT,
|
||||
0);
|
||||
extent->set_contents('c');
|
||||
csum = extent->get_crc32c();
|
||||
submit_transaction(std::move(t)).get0();
|
||||
@ -160,7 +162,9 @@ TEST_F(cache_test_t, test_dirty_extent)
|
||||
auto t = get_transaction();
|
||||
auto extent = cache->alloc_new_extent<TestBlockPhysical>(
|
||||
*t,
|
||||
TestBlockPhysical::SIZE);
|
||||
TestBlockPhysical::SIZE,
|
||||
placement_hint_t::HOT,
|
||||
0);
|
||||
extent->set_contents('c');
|
||||
csum = extent->get_crc32c();
|
||||
auto reladdr = extent->get_paddr();
|
||||
|
@ -109,7 +109,9 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
|
||||
|
||||
segment_id_t allocate_segment(
|
||||
segment_seq_t seq,
|
||||
segment_type_t type
|
||||
segment_type_t type,
|
||||
data_category_t,
|
||||
reclaim_gen_t
|
||||
) final {
|
||||
auto ret = next;
|
||||
next = segment_id_t{
|
||||
|
Loading…
Reference in New Issue
Block a user