Merge pull request #59212 from cyx1231st/wip-seastore-more-reports

crimson/os/seastore/cache: report lru usage/in/out with trans and extent type

Reviewed-by: Samuel Just <sjust@redhat.com>
Reviewed-by: Xuehan Xu <xuxuehan@qianxin.com>
This commit is contained in:
Yingxin 2024-08-19 10:18:32 +08:00 committed by GitHub
commit 44747ff4c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 551 additions and 176 deletions

View File

@ -44,7 +44,7 @@ Cache::Cache(
"seastore_cache_lru_size"))
{
LOG_PREFIX(Cache::Cache);
INFO("created, lru_size={}", lru.get_capacity());
INFO("created, lru_capacity={}B", lru.get_capacity_bytes());
register_metrics();
segment_providers_by_device_id.resize(DEVICE_ID_MAX, nullptr);
}
@ -97,8 +97,7 @@ Cache::retire_extent_ret Cache::retire_extent_addr(
TRANS_ID_NULL);
DEBUGT("retire {}~{} as placeholder, add extent -- {}",
t, addr, length, *ext);
const auto t_src = t.get_src();
add_extent(ext, &t_src);
add_extent(ext);
}
t.add_to_read_set(ext);
t.add_to_retired_set(ext);
@ -126,8 +125,7 @@ void Cache::retire_absent_extent_addr(
TRANS_ID_NULL);
DEBUGT("retire {}~{} as placeholder, add extent -- {}",
t, addr, length, *ext);
const auto t_src = t.get_src();
add_extent(ext, &t_src);
add_extent(ext);
t.add_to_read_set(ext);
t.add_to_retired_set(ext);
}
@ -511,14 +509,14 @@ void Cache::register_metrics()
sm::make_counter(
"cache_lru_size_bytes",
[this] {
return lru.get_current_contents_bytes();
return lru.get_current_size_bytes();
},
sm::description("total bytes pinned by the lru")
),
sm::make_counter(
"cache_lru_size_extents",
"cache_lru_num_extents",
[this] {
return lru.get_current_contents_extents();
return lru.get_current_num_extents();
},
sm::description("total extents pinned by the lru")
),
@ -727,19 +725,12 @@ void Cache::register_metrics()
);
}
void Cache::add_extent(
CachedExtentRef ref,
const Transaction::src_t* p_src=nullptr)
void Cache::add_extent(CachedExtentRef ref)
{
assert(ref->is_valid());
assert(ref->user_hint == PLACEMENT_HINT_NULL);
assert(ref->rewrite_generation == NULL_GENERATION);
extents.insert(*ref);
if (ref->is_dirty()) {
add_to_dirty(ref);
} else {
touch_extent(*ref, p_src);
}
}
void Cache::mark_dirty(CachedExtentRef ref)
@ -766,14 +757,11 @@ void Cache::add_to_dirty(CachedExtentRef ref)
void Cache::remove_from_dirty(CachedExtentRef ref)
{
if (ref->is_dirty()) {
ceph_assert(ref->primary_ref_list_hook.is_linked());
stats.dirty_bytes -= ref->get_length();
dirty.erase(dirty.s_iterator_to(*ref));
intrusive_ptr_release(&*ref);
} else {
ceph_assert(!ref->primary_ref_list_hook.is_linked());
}
assert(ref->is_dirty());
ceph_assert(ref->primary_ref_list_hook.is_linked());
stats.dirty_bytes -= ref->get_length();
dirty.erase(dirty.s_iterator_to(*ref));
intrusive_ptr_release(&*ref);
}
void Cache::remove_extent(CachedExtentRef ref)
@ -807,7 +795,7 @@ void Cache::commit_replace_extent(
assert(next->version == prev->version + 1);
extents.replace(*next, *prev);
if (prev->get_type() == extent_types_t::ROOT) {
if (is_root_type(prev->get_type())) {
assert(prev->is_stable_clean()
|| prev->primary_ref_list_hook.is_linked());
if (prev->is_dirty()) {
@ -1093,7 +1081,7 @@ CachedExtentRef Cache::duplicate_for_write(
auto [iter, inserted] = i->mutation_pendings.insert(*ret);
ceph_assert(inserted);
t.add_mutated_extent(ret);
if (ret->get_type() == extent_types_t::ROOT) {
if (is_root_type(ret->get_type())) {
t.root = ret->cast<RootBlock>();
} else {
ret->last_committed_crc = i->last_committed_crc;
@ -1187,7 +1175,7 @@ record_t Cache::prepare_record(
assert(i->get_version() > 0);
auto final_crc = i->calc_crc32c();
if (i->get_type() == extent_types_t::ROOT) {
if (is_root_type(i->get_type())) {
SUBTRACET(seastore_t, "writing out root delta {}B -- {}",
t, delta_length, *i);
assert(t.root == i);
@ -1255,8 +1243,8 @@ record_t Cache::prepare_record(
retire_stat.increment(extent->get_length());
DEBUGT("retired and remove extent -- {}", t, *extent);
commit_retire_extent(t, extent);
if (is_backref_mapped_extent_node(extent)
|| is_retired_placeholder(extent->get_type())) {
if (is_backref_mapped_extent_node(extent) ||
is_retired_placeholder_type(extent->get_type())) {
rel_delta.alloc_blk_ranges.emplace_back(
extent->get_paddr(),
L_ADDR_NULL,
@ -1289,7 +1277,7 @@ record_t Cache::prepare_record(
i->prepare_write();
i->prepare_commit();
bl.append(i->get_bptr());
if (i->get_type() == extent_types_t::ROOT) {
if (is_root_type(i->get_type())) {
ceph_assert(0 == "ROOT never gets written as a fresh block");
}
@ -1354,7 +1342,7 @@ record_t Cache::prepare_record(
i->state = CachedExtent::extent_state_t::CLEAN;
assert(i->is_logical());
i->clear_modified_region();
touch_extent(*i);
touch_extent(*i, &trans_src);
DEBUGT("inplace rewrite ool block is commmitted -- {}", t, *i);
}
@ -1576,9 +1564,11 @@ void Cache::complete_commit(
i->prior_instance.reset();
DEBUGT("add extent as fresh, inline={} -- {}",
t, is_inline, *i);
const auto t_src = t.get_src();
i->invalidate_hints();
add_extent(i, &t_src);
add_extent(i);
assert(!i->is_dirty());
const auto t_src = t.get_src();
touch_extent(*i, &t_src);
epm.commit_space_used(i->get_paddr(), i->get_length());
if (is_backref_mapped_extent_node(i)) {
DEBUGT("backref_list new {} len {}",
@ -1619,7 +1609,7 @@ void Cache::complete_commit(
i->prior_instance = CachedExtentRef();
i->state = CachedExtent::extent_state_t::DIRTY;
assert(i->version > 0);
if (i->version == 1 || i->get_type() == extent_types_t::ROOT) {
if (i->version == 1 || is_root_type(i->get_type())) {
i->dirty_from_or_retired_at = start_seq;
DEBUGT("commit extent done, become dirty -- {}", t, *i);
} else {
@ -1648,8 +1638,8 @@ void Cache::complete_commit(
for (auto &i: t.retired_set) {
auto &extent = i.extent;
extent->dirty_from_or_retired_at = start_seq;
if (is_backref_mapped_extent_node(extent)
|| is_retired_placeholder(extent->get_type())) {
if (is_backref_mapped_extent_node(extent) ||
is_retired_placeholder_type(extent->get_type())) {
DEBUGT("backref_list free {} len {}",
t,
extent->get_paddr(),
@ -1694,8 +1684,13 @@ void Cache::complete_commit(
i->get_length(),
i->get_type(),
start_seq));
const auto t_src = t.get_src();
add_extent(i, &t_src);
add_extent(i);
if (i->is_dirty()) {
add_to_dirty(i);
} else {
const auto t_src = t.get_src();
touch_extent(*i, &t_src);
}
}
}
if (!backref_list.empty()) {
@ -1752,8 +1747,8 @@ Cache::close_ertr::future<> Cache::close()
stats.dirty_bytes,
get_oldest_dirty_from().value_or(JOURNAL_SEQ_NULL),
get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL),
lru.get_current_contents_extents(),
lru.get_current_contents_bytes(),
lru.get_current_num_extents(),
lru.get_current_size_bytes(),
extents.size(),
extents.get_bytes());
root.reset();
@ -1860,7 +1855,7 @@ Cache::replay_delta(
std::make_pair(false, nullptr));
}
if (delta.type == extent_types_t::ROOT) {
if (is_root_type(delta.type)) {
TRACE("replay root delta at {} {}, remove extent ... -- {}, prv_root={}",
journal_seq, record_base, delta, *root);
remove_extent(root);
@ -1871,6 +1866,7 @@ Cache::replay_delta(
journal_seq, record_base, delta, *root);
root->set_modify_time(modify_time);
add_extent(root);
add_to_dirty(root);
return replay_delta_ertr::make_ready_future<std::pair<bool, CachedExtentRef>>(
std::make_pair(true, root));
} else {
@ -1881,7 +1877,7 @@ Cache::replay_delta(
if (ret) {
// no retired-placeholder should be exist yet because no transaction
// has been created.
assert(ret->get_type() != extent_types_t::RETIRED_PLACEHOLDER);
assert(!is_retired_placeholder_type(ret->get_type()));
return ret->wait_io().then([ret] {
return ret;
});
@ -1898,7 +1894,9 @@ Cache::replay_delta(
delta.length,
nullptr,
[](CachedExtent &) {},
[](CachedExtent &) {}) :
[this](CachedExtent &ext) {
touch_extent(ext, nullptr);
}) :
_get_extent_if_cached(
delta.paddr)
).handle_error(
@ -2009,7 +2007,7 @@ Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents(
if (result == Transaction::get_extent_ret::ABSENT) {
DEBUGT("extent is absent on t -- {}", t, *ext);
t.add_to_read_set(ext);
if (ext->get_type() == extent_types_t::ROOT) {
if (is_root_type(ext->get_type())) {
if (t.root) {
assert(&*t.root == &*ext);
ceph_assert(0 == "t.root would have to already be in the read set");
@ -2161,4 +2159,110 @@ Cache::do_get_caching_extent_by_type(
});
}
cache_stats_t Cache::get_stats(
bool report_detail, double seconds) const
{
cache_stats_t ret;
lru.get_stats(ret, report_detail, seconds);
return ret;
}
void Cache::LRU::get_stats(
cache_stats_t &stats,
bool report_detail,
double seconds) const
{
LOG_PREFIX(Cache::LRU::get_stats);
stats.lru_sizes = cache_size_stats_t{current_size, lru.size()};
stats.lru_io = overall_io;
stats.lru_io.minus(last_overall_io);
if (report_detail && seconds != 0) {
cache_io_stats_t _trans_io = trans_io;
_trans_io.minus(last_trans_io);
cache_io_stats_t other_io = stats.lru_io;
other_io.minus(_trans_io);
counter_by_src_t<counter_by_extent_t<cache_io_stats_t> >
_trans_io_by_src_ext = trans_io_by_src_ext;
counter_by_src_t<cache_io_stats_t> trans_io_by_src;
for (uint8_t _src=0; _src<TRANSACTION_TYPE_MAX; ++_src) {
auto src = static_cast<transaction_type_t>(_src);
auto& io_by_ext = get_by_src(_trans_io_by_src_ext, src);
const auto& last_io_by_ext = get_by_src(last_trans_io_by_src_ext, src);
auto& trans_io_per_src = get_by_src(trans_io_by_src, src);
for (uint8_t _ext=0; _ext<EXTENT_TYPES_MAX; ++_ext) {
auto ext = static_cast<extent_types_t>(_ext);
auto& extent_io = get_by_ext(io_by_ext, ext);
const auto& last_extent_io = get_by_ext(last_io_by_ext, ext);
extent_io.minus(last_extent_io);
trans_io_per_src.add(extent_io);
}
}
std::ostringstream oss;
oss << "\nlru total" << stats.lru_sizes;
cache_size_stats_t data_sizes;
cache_size_stats_t mdat_sizes;
cache_size_stats_t phys_sizes;
for (uint8_t _ext=0; _ext<EXTENT_TYPES_MAX; ++_ext) {
auto ext = static_cast<extent_types_t>(_ext);
const auto extent_sizes = get_by_ext(sizes_by_ext, ext);
if (is_data_type(ext)) {
data_sizes.add(extent_sizes);
} else if (is_logical_metadata_type(ext)) {
mdat_sizes.add(extent_sizes);
} else if (is_physical_type(ext)) {
phys_sizes.add(extent_sizes);
}
}
oss << "\n data" << data_sizes
<< "\n mdat" << mdat_sizes
<< "\n phys" << phys_sizes;
oss << "\nlru io: trans-"
<< cache_io_stats_printer_t{seconds, _trans_io}
<< "; other-"
<< cache_io_stats_printer_t{seconds, other_io};
for (uint8_t _src=0; _src<TRANSACTION_TYPE_MAX; ++_src) {
auto src = static_cast<transaction_type_t>(_src);
const auto& trans_io_per_src = get_by_src(trans_io_by_src, src);
if (trans_io_per_src.is_empty()) {
continue;
}
cache_io_stats_t data_io;
cache_io_stats_t mdat_io;
cache_io_stats_t phys_io;
const auto& io_by_ext = get_by_src(_trans_io_by_src_ext, src);
for (uint8_t _ext=0; _ext<EXTENT_TYPES_MAX; ++_ext) {
auto ext = static_cast<extent_types_t>(_ext);
const auto extent_io = get_by_ext(io_by_ext, ext);
if (is_data_type(ext)) {
data_io.add(extent_io);
} else if (is_logical_metadata_type(ext)) {
mdat_io.add(extent_io);
} else if (is_physical_type(ext)) {
phys_io.add(extent_io);
}
}
oss << "\n " << src << ": "
<< cache_io_stats_printer_t{seconds, trans_io_per_src}
<< "\n data: "
<< cache_io_stats_printer_t{seconds, data_io}
<< "\n mdat: "
<< cache_io_stats_printer_t{seconds, mdat_io}
<< "\n phys: "
<< cache_io_stats_printer_t{seconds, phys_io};
}
INFO("{}", oss.str());
}
last_overall_io = overall_io;
last_trans_io = trans_io;
last_trans_io_by_src_ext = trans_io_by_src_ext;
}
}

View File

@ -198,6 +198,8 @@ public:
Cache(ExtentPlacementManager &epm);
~Cache();
cache_stats_t get_stats(bool report_detail, double seconds) const;
/// Creates empty transaction by source
TransactionRef create_transaction(
Transaction::src_t src,
@ -312,7 +314,7 @@ public:
if (!ret) {
SUBDEBUGT(seastore_cache, "{} {} is absent", t, type, offset);
return get_extent_if_cached_iertr::make_ready_future<CachedExtentRef>();
} else if (ret->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
} else if (is_retired_placeholder_type(ret->get_type())) {
// retired_placeholder is not really cached yet
SUBDEBUGT(seastore_cache, "{} {} is absent(placeholder)",
t, type, offset);
@ -327,7 +329,8 @@ public:
SUBDEBUGT(seastore_cache, "{} {} is present in cache -- {}",
t, type, offset, *ret);
t.add_to_read_set(ret);
touch_extent(*ret);
const auto t_src = t.get_src();
touch_extent(*ret, &t_src);
return ret->wait_io().then([ret] {
return get_extent_if_cached_iertr::make_ready_future<
CachedExtentRef>(ret);
@ -371,7 +374,6 @@ public:
});
} else {
assert(!ret->is_mutable());
touch_extent(*ret);
SUBDEBUGT(seastore_cache, "{} {}~{} is present on t without been \
fully loaded, reading ... {}", t, T::TYPE, offset, length, *ret);
auto bp = alloc_cache_buf(ret->get_length());
@ -384,7 +386,8 @@ public:
t, T::TYPE, offset, length);
auto f = [&t, this](CachedExtent &ext) {
t.add_to_read_set(CachedExtentRef(&ext));
touch_extent(ext);
const auto t_src = t.get_src();
touch_extent(ext, &t_src);
};
auto metric_key = std::make_pair(t.get_src(), T::TYPE);
return trans_intr::make_interruptible(
@ -421,7 +424,8 @@ public:
t, T::TYPE, offset, length);
auto f = [&t, this](CachedExtent &ext) {
t.add_to_read_set(CachedExtentRef(&ext));
touch_extent(ext);
const auto t_src = t.get_src();
touch_extent(ext, &t_src);
};
auto metric_key = std::make_pair(t.get_src(), T::TYPE);
return trans_intr::make_interruptible(
@ -495,7 +499,8 @@ public:
// stable from trans-view
assert(!p_extent->is_pending_in_trans(t.get_trans_id()));
if (t.maybe_add_to_read_set(p_extent)) {
touch_extent(*p_extent);
const auto t_src = t.get_src();
touch_extent(*p_extent, &t_src);
}
}
} else {
@ -511,9 +516,8 @@ public:
}
}
assert(p_extent->is_stable() || p_extent->is_exist_clean());
// user should not see RETIRED_PLACEHOLDER extents
ceph_assert(p_extent->get_type() != extent_types_t::RETIRED_PLACEHOLDER);
ceph_assert(!is_retired_placeholder_type(p_extent->get_type()));
if (!p_extent->is_fully_loaded()) {
assert(!p_extent->is_mutable());
LOG_PREFIX(Cache::get_extent_viewable_by_trans);
@ -586,8 +590,8 @@ private:
SUBDEBUG(seastore_cache,
"{} {}~{} is absent, add extent and reading ... -- {}",
T::TYPE, offset, length, *ret);
const auto p_src = p_src_ext ? &p_src_ext->first : nullptr;
add_extent(ret, p_src);
add_extent(ret);
// touch_extent() should be included in on_cache
on_cache(*ret);
extent_init_func(*ret);
return read_extent<T>(
@ -595,7 +599,7 @@ private:
}
// extent PRESENT in cache
if (cached->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
if (is_retired_placeholder_type(cached->get_type())) {
auto ret = CachedExtent::make_cached_extent_ref<T>(
alloc_cache_buf(length));
ret->init(CachedExtent::extent_state_t::CLEAN_PENDING,
@ -712,7 +716,6 @@ private:
});
} else {
assert(!ret->is_mutable());
touch_extent(*ret);
SUBDEBUGT(seastore_cache, "{} {}~{} {} is present on t without been \
fully loaded, reading ...", t, type, offset, length, laddr);
auto bp = alloc_cache_buf(ret->get_length());
@ -725,7 +728,8 @@ private:
t, type, offset, length, laddr);
auto f = [&t, this](CachedExtent &ext) {
t.add_to_read_set(CachedExtentRef(&ext));
touch_extent(ext);
const auto t_src = t.get_src();
touch_extent(ext, &t_src);
};
auto src = t.get_src();
return trans_intr::make_interruptible(
@ -759,7 +763,8 @@ private:
t, type, offset, length, laddr);
auto f = [&t, this](CachedExtent &ext) {
t.add_to_read_set(CachedExtentRef(&ext));
touch_extent(ext);
const auto t_src = t.get_src();
touch_extent(ext, &t_src);
};
auto src = t.get_src();
return trans_intr::make_interruptible(
@ -1352,7 +1357,7 @@ private:
/// Update lru for access to ref
void touch_extent(
CachedExtent &ext,
const Transaction::src_t* p_src=nullptr)
const Transaction::src_t* p_src)
{
if (p_src &&
is_background_transaction(*p_src) &&
@ -1360,7 +1365,7 @@ private:
return;
}
if (ext.is_stable_clean() && !ext.is_placeholder()) {
lru.move_to_top(ext);
lru.move_to_top(ext, p_src);
}
}
@ -1380,7 +1385,7 @@ private:
*
* holds refs to dirty extents. Ordered by CachedExtent::get_dirty_from().
*/
CachedExtent::list dirty;
CachedExtent::primary_ref_list dirty;
using backref_extent_entry_query_set_t =
std::set<
@ -1416,6 +1421,7 @@ private:
friend class crimson::os::seastore::backref::BtreeBackrefManager;
friend class crimson::os::seastore::BackrefManager;
/**
* lru
*
@ -1426,71 +1432,109 @@ private:
const size_t capacity = 0;
// current size (bytes)
size_t contents = 0;
size_t current_size = 0;
CachedExtent::list lru;
counter_by_extent_t<cache_size_stats_t> sizes_by_ext;
cache_io_stats_t overall_io;
cache_io_stats_t trans_io;
counter_by_src_t<counter_by_extent_t<cache_io_stats_t> >
trans_io_by_src_ext;
void trim_to_capacity() {
while (contents > capacity) {
assert(lru.size() > 0);
remove_from_lru(lru.front());
}
}
mutable cache_io_stats_t last_overall_io;
mutable cache_io_stats_t last_trans_io;
mutable counter_by_src_t<counter_by_extent_t<cache_io_stats_t> >
last_trans_io_by_src_ext;
void add_to_lru(CachedExtent &extent) {
CachedExtent::primary_ref_list lru;
void do_remove_from_lru(
CachedExtent &extent,
const Transaction::src_t* p_src) {
assert(extent.is_stable_clean() && !extent.is_placeholder());
if (!extent.primary_ref_list_hook.is_linked()) {
contents += extent.get_length();
intrusive_ptr_add_ref(&extent);
lru.push_back(extent);
assert(extent.primary_ref_list_hook.is_linked());
assert(lru.size() > 0);
auto extent_length = extent.get_length();
assert(current_size >= extent_length);
lru.erase(lru.s_iterator_to(extent));
current_size -= extent_length;
get_by_ext(sizes_by_ext, extent.get_type()).account_out(extent_length);
overall_io.account_out(extent_length);
if (p_src) {
trans_io.account_out(extent_length);
get_by_ext(
get_by_src(trans_io_by_src_ext, *p_src),
extent.get_type()).account_out(extent_length);
}
trim_to_capacity();
intrusive_ptr_release(&extent);
}
public:
LRU(size_t capacity) : capacity(capacity) {}
size_t get_capacity() const {
size_t get_capacity_bytes() const {
return capacity;
}
size_t get_current_contents_bytes() const {
return contents;
size_t get_current_size_bytes() const {
return current_size;
}
size_t get_current_contents_extents() const {
size_t get_current_num_extents() const {
return lru.size();
}
void get_stats(
cache_stats_t &stats,
bool report_detail,
double seconds) const;
void remove_from_lru(CachedExtent &extent) {
assert(extent.is_stable_clean() && !extent.is_placeholder());
if (extent.primary_ref_list_hook.is_linked()) {
lru.erase(lru.s_iterator_to(extent));
assert(contents >= extent.get_length());
contents -= extent.get_length();
intrusive_ptr_release(&extent);
do_remove_from_lru(extent, nullptr);
}
}
void move_to_top(CachedExtent &extent) {
void move_to_top(
CachedExtent &extent,
const Transaction::src_t* p_src) {
assert(extent.is_stable_clean() && !extent.is_placeholder());
auto extent_length = extent.get_length();
if (extent.primary_ref_list_hook.is_linked()) {
lru.erase(lru.s_iterator_to(extent));
intrusive_ptr_release(&extent);
assert(contents >= extent.get_length());
contents -= extent.get_length();
// present, move to top (back)
assert(lru.size() > 0);
assert(current_size >= extent_length);
lru.erase(lru.s_iterator_to(extent));
lru.push_back(extent);
} else {
// absent, add to top (back)
current_size += extent_length;
get_by_ext(sizes_by_ext, extent.get_type()).account_in(extent_length);
overall_io.account_in(extent_length);
if (p_src) {
trans_io.account_in(extent_length);
get_by_ext(
get_by_src(trans_io_by_src_ext, *p_src),
extent.get_type()).account_in(extent_length);
}
intrusive_ptr_add_ref(&extent);
lru.push_back(extent);
// trim to capacity
while (current_size > capacity) {
do_remove_from_lru(lru.front(), p_src);
}
}
add_to_lru(extent);
}
void clear() {
LOG_PREFIX(Cache::LRU::clear);
for (auto iter = lru.begin(); iter != lru.end();) {
SUBDEBUG(seastore_cache, "clearing {}", *iter);
remove_from_lru(*(iter++));
do_remove_from_lru(*(iter++), nullptr);
}
}
@ -1504,9 +1548,6 @@ private:
uint64_t hit = 0;
};
template <typename CounterT>
using counter_by_extent_t = std::array<CounterT, EXTENT_TYPES_MAX>;
struct invalid_trans_efforts_t {
io_stat_t read;
io_stat_t mutate;
@ -1590,15 +1631,6 @@ private:
version_stat_t committed_reclaim_version;
} stats;
template <typename CounterT>
CounterT& get_by_ext(
counter_by_extent_t<CounterT>& counters_by_ext,
extent_types_t ext) {
auto index = static_cast<uint8_t>(ext);
assert(index < EXTENT_TYPES_MAX);
return counters_by_ext[index];
}
void account_conflict(Transaction::src_t src1, Transaction::src_t src2) {
assert(src1 < Transaction::src_t::MAX);
assert(src2 < Transaction::src_t::MAX);
@ -1644,7 +1676,10 @@ private:
const journal_seq_t &);
/// Add extent to extents handling dirty and refcounting
void add_extent(CachedExtentRef ref, const Transaction::src_t* t_src);
///
/// Note, it must follows with add_to_dirty() or touch_extent().
/// The only exception is RetiredExtentPlaceholder.
void add_extent(CachedExtentRef ref);
/// Mark exising extent ref dirty -- mainly for replay
void mark_dirty(CachedExtentRef ref);
@ -1730,7 +1765,7 @@ private:
iter != extents.end()) {
if (p_metric_key &&
// retired_placeholder is not really cached yet
iter->get_type() != extent_types_t::RETIRED_PLACEHOLDER) {
!is_retired_placeholder_type(iter->get_type())) {
++p_counters->hit;
}
return CachedExtentRef(&*iter);

View File

@ -515,7 +515,7 @@ public:
/// Returns true if extent is a plcaeholder
bool is_placeholder() const {
return get_type() == extent_types_t::RETIRED_PLACEHOLDER;
return is_retired_placeholder_type(get_type());
}
bool is_pending_io() const {
@ -685,7 +685,7 @@ private:
CachedExtent,
boost::intrusive::list_member_hook<>,
&CachedExtent::primary_ref_list_hook>;
using list = boost::intrusive::list<
using primary_ref_list = boost::intrusive::list<
CachedExtent,
primary_ref_list_member_options>;

View File

@ -277,7 +277,8 @@ void ExtentPlacementManager::set_primary_device(Device *device)
device_stats_t
ExtentPlacementManager::get_device_stats(
const writer_stats_t &journal_stats,
bool report_detail) const
bool report_detail,
double seconds) const
{
LOG_PREFIX(ExtentPlacementManager::get_device_stats);
@ -345,16 +346,7 @@ ExtentPlacementManager::get_device_stats(
cold_stats.add(cold_writer_stats.back());
}
auto now = seastar::lowres_clock::now();
if (last_tp == seastar::lowres_clock::time_point::min()) {
last_tp = now;
return {};
}
std::chrono::duration<double> duration_d = now - last_tp;
double seconds = duration_d.count();
last_tp = now;
if (report_detail) {
if (report_detail && seconds != 0) {
std::ostringstream oss;
auto report_writer_stats = [seconds, &oss](
const char* name,

View File

@ -177,7 +177,7 @@ public:
return false;
}
assert(t.get_src() == transaction_type_t::TRIM_DIRTY);
ceph_assert_always(extent->get_type() == extent_types_t::ROOT ||
ceph_assert_always(is_root_type(extent->get_type()) ||
extent->get_paddr().is_absolute());
return crimson::os::seastore::can_inplace_rewrite(extent->get_type());
}
@ -303,7 +303,8 @@ public:
device_stats_t get_device_stats(
const writer_stats_t &journal_stats,
bool report_detail) const;
bool report_detail,
double seconds) const;
using mount_ertr = crimson::errorator<
crimson::ct_error::input_output_error>;
@ -571,7 +572,8 @@ private:
extent_types_t type,
placement_hint_t hint,
rewrite_gen_t gen) {
if (type == extent_types_t::ROOT) {
assert(is_real_type(type));
if (is_root_type(type)) {
gen = INLINE_GENERATION;
} else if (get_main_backend_type() == backend_type_t::SEGMENTED &&
is_lba_backref_node(type)) {
@ -1097,9 +1099,6 @@ private:
SegmentSeqAllocatorRef ool_segment_seq_allocator;
extent_len_t max_data_allocation_size = 0;
mutable seastar::lowres_clock::time_point last_tp =
seastar::lowres_clock::time_point::min();
friend class ::transaction_manager_test_t;
};

View File

@ -590,16 +590,21 @@ seastar::future<> SeaStore::report_stats()
ceph_assert(seastar::this_shard_id() == primary_core);
shard_device_stats.resize(seastar::smp::count);
shard_io_stats.resize(seastar::smp::count);
shard_cache_stats.resize(seastar::smp::count);
return shard_stores.invoke_on_all([this](const Shard &local_store) {
bool report_detail = false;
double seconds = 0;
if (seastar::this_shard_id() == 0) {
// avoid too verbose logs, only report detail in a particular shard
report_detail = true;
seconds = local_store.reset_report_interval();
}
shard_device_stats[seastar::this_shard_id()] =
local_store.get_device_stats(report_detail);
local_store.get_device_stats(report_detail, seconds);
shard_io_stats[seastar::this_shard_id()] =
local_store.get_io_stats(report_detail);
local_store.get_io_stats(report_detail, seconds);
shard_cache_stats[seastar::this_shard_id()] =
local_store.get_cache_stats(report_detail, seconds);
}).then([this] {
LOG_PREFIX(SeaStore);
auto now = seastar::lowres_clock::now();
@ -695,6 +700,24 @@ seastar::future<> SeaStore::report_stats()
<< ") ";
}
INFO("details: {}", oss_pending.str());
cache_stats_t cache_total = {};
for (const auto& s : shard_cache_stats) {
cache_total.add(s);
}
cache_size_stats_t lru_sizes_ps = cache_total.lru_sizes;
lru_sizes_ps.size /= seastar::smp::count;
lru_sizes_ps.num_extents /= seastar::smp::count;
cache_io_stats_t lru_io_ps = cache_total.lru_io;
lru_io_ps.in_size /= seastar::smp::count;
lru_io_ps.in_num_extents /= seastar::smp::count;
lru_io_ps.out_size /= seastar::smp::count;
lru_io_ps.out_num_extents /= seastar::smp::count;
INFO("cache lru: total{} {}; per-shard: total{} {}",
cache_total.lru_sizes,
cache_io_stats_printer_t{seconds, cache_total.lru_io},
lru_sizes_ps,
cache_io_stats_printer_t{seconds, lru_io_ps});
return seastar::now();
});
}
@ -2530,27 +2553,33 @@ void SeaStore::Shard::init_managers()
*transaction_manager);
}
device_stats_t SeaStore::Shard::get_device_stats(bool report_detail) const
{
return transaction_manager->get_device_stats(report_detail);
}
shard_stats_t SeaStore::Shard::get_io_stats(bool report_detail) const
double SeaStore::Shard::reset_report_interval() const
{
double seconds;
auto now = seastar::lowres_clock::now();
if (last_tp == seastar::lowres_clock::time_point::min()) {
last_tp = now;
last_shard_stats = shard_stats;
return {};
seconds = 0;
} else {
std::chrono::duration<double> duration_d = now - last_tp;
seconds = duration_d.count();
}
std::chrono::duration<double> duration_d = now - last_tp;
double seconds = duration_d.count();
last_tp = now;
return seconds;
}
device_stats_t SeaStore::Shard::get_device_stats(
bool report_detail, double seconds) const
{
return transaction_manager->get_device_stats(report_detail, seconds);
}
shard_stats_t SeaStore::Shard::get_io_stats(
bool report_detail, double seconds) const
{
shard_stats_t ret = shard_stats;
ret.minus(last_shard_stats);
last_shard_stats = shard_stats;
if (report_detail) {
if (report_detail && seconds != 0) {
LOG_PREFIX(SeaStore::get_io_stats);
auto calc_conflicts = [](uint64_t ios, uint64_t repeats) {
return (double)(repeats-ios)/ios;
@ -2586,9 +2615,18 @@ shard_stats_t SeaStore::Shard::get_io_stats(bool report_detail) const
ret.pending_bg_num,
ret.pending_flush_num);
}
last_shard_stats = shard_stats;
return ret;
}
cache_stats_t SeaStore::Shard::get_cache_stats(
bool report_detail, double seconds) const
{
return transaction_manager->get_cache_stats(
report_detail, seconds);
}
std::unique_ptr<SeaStore> make_seastore(
const std::string &device)
{

View File

@ -204,9 +204,13 @@ public:
void init_managers();
device_stats_t get_device_stats(bool report_detail) const;
double reset_report_interval() const;
shard_stats_t get_io_stats(bool report_detail) const;
device_stats_t get_device_stats(bool report_detail, double seconds) const;
shard_stats_t get_io_stats(bool report_detail, double seconds) const;
cache_stats_t get_cache_stats(bool report_detail, double seconds) const;
private:
struct internal_context_t {
@ -583,6 +587,7 @@ private:
seastar::lowres_clock::time_point::min();
mutable std::vector<device_stats_t> shard_device_stats;
mutable std::vector<shard_stats_t> shard_io_stats;
mutable std::vector<cache_stats_t> shard_cache_stats;
};
std::unique_ptr<SeaStore> make_seastore(

View File

@ -245,6 +245,10 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
return out << "OBJECT_DATA_BLOCK";
case extent_types_t::RETIRED_PLACEHOLDER:
return out << "RETIRED_PLACEHOLDER";
case extent_types_t::ALLOC_INFO:
return out << "ALLOC_INFO";
case extent_types_t::JOURNAL_TAIL:
return out << "JOURNAL_TAIL";
case extent_types_t::TEST_BLOCK:
return out << "TEST_BLOCK";
case extent_types_t::TEST_BLOCK_PHYSICAL:
@ -256,7 +260,7 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
case extent_types_t::NONE:
return out << "NONE";
default:
return out << "UNKNOWN";
return out << "UNKNOWN(" << (unsigned)t << ")";
}
}
@ -290,7 +294,7 @@ std::ostream &operator<<(std::ostream &out, data_category_t c)
}
bool can_inplace_rewrite(extent_types_t type) {
return get_extent_category(type) == data_category_t::DATA;
return is_data_type(type);
}
std::ostream &operator<<(std::ostream &out, sea_time_point_printer_t tp)
@ -976,4 +980,36 @@ std::ostream& operator<<(std::ostream& out, const writer_stats_printer_t& p)
return out;
}
std::ostream& operator<<(std::ostream& out, const cache_io_stats_printer_t& p)
{
constexpr const char* dfmt = "{:.2f}";
out << "in("
<< fmt::format(dfmt, p.stats.get_in_mbs(p.seconds))
<< "MiB/s,"
<< fmt::format(dfmt, p.stats.get_in_avg_kb())
<< "KiB,"
<< fmt::format(dfmt, p.stats.in_num_extents/p.seconds)
<< "ps) out("
<< fmt::format(dfmt, p.stats.get_out_mbs(p.seconds))
<< "MiB/s,"
<< fmt::format(dfmt, p.stats.get_out_avg_kb())
<< "KiB,"
<< fmt::format(dfmt, p.stats.out_num_extents/p.seconds)
<< "ps)";
return out;
}
std::ostream& operator<<(std::ostream& out, const cache_size_stats_t& p)
{
constexpr const char* dfmt = "{:.2f}";
out << "("
<< fmt::format(dfmt, p.get_mb())
<< "MiB,"
<< fmt::format(dfmt, p.get_avg_kb())
<< "KiB,"
<< p.num_extents
<< ")";
return out;
}
} // namespace crimson::os::seastore

View File

@ -1197,42 +1197,83 @@ constexpr size_t BACKREF_NODE_SIZE = 4096;
std::ostream &operator<<(std::ostream &out, extent_types_t t);
constexpr bool is_data_type(extent_types_t type) {
return type == extent_types_t::OBJECT_DATA_BLOCK ||
type == extent_types_t::TEST_BLOCK;
}
constexpr bool is_logical_metadata_type(extent_types_t type) {
return type >= extent_types_t::OMAP_INNER &&
type <= extent_types_t::COLL_BLOCK;
}
constexpr bool is_logical_type(extent_types_t type) {
switch (type) {
case extent_types_t::ROOT:
case extent_types_t::LADDR_INTERNAL:
case extent_types_t::LADDR_LEAF:
case extent_types_t::BACKREF_INTERNAL:
case extent_types_t::BACKREF_LEAF:
return false;
default:
if ((type >= extent_types_t::OMAP_INNER &&
type <= extent_types_t::OBJECT_DATA_BLOCK) ||
type == extent_types_t::TEST_BLOCK) {
assert(is_logical_metadata_type(type) ||
is_data_type(type));
return true;
} else {
assert(!is_logical_metadata_type(type) &&
!is_data_type(type));
return false;
}
}
constexpr bool is_retired_placeholder(extent_types_t type)
{
constexpr bool is_retired_placeholder_type(extent_types_t type) {
return type == extent_types_t::RETIRED_PLACEHOLDER;
}
constexpr bool is_lba_node(extent_types_t type)
{
constexpr bool is_root_type(extent_types_t type) {
return type == extent_types_t::ROOT;
}
constexpr bool is_lba_node(extent_types_t type) {
return type == extent_types_t::LADDR_INTERNAL ||
type == extent_types_t::LADDR_LEAF ||
type == extent_types_t::DINK_LADDR_LEAF;
type == extent_types_t::LADDR_LEAF ||
type == extent_types_t::DINK_LADDR_LEAF;
}
constexpr bool is_backref_node(extent_types_t type)
{
constexpr bool is_backref_node(extent_types_t type) {
return type == extent_types_t::BACKREF_INTERNAL ||
type == extent_types_t::BACKREF_LEAF;
type == extent_types_t::BACKREF_LEAF;
}
constexpr bool is_lba_backref_node(extent_types_t type)
{
constexpr bool is_lba_backref_node(extent_types_t type) {
return is_lba_node(type) || is_backref_node(type);
}
constexpr bool is_physical_type(extent_types_t type) {
if (type <= extent_types_t::DINK_LADDR_LEAF ||
(type >= extent_types_t::TEST_BLOCK_PHYSICAL &&
type <= extent_types_t::BACKREF_LEAF)) {
assert(is_root_type(type) ||
is_lba_backref_node(type) ||
type == extent_types_t::TEST_BLOCK_PHYSICAL);
return true;
} else {
assert(!is_root_type(type) &&
!is_lba_backref_node(type) &&
type != extent_types_t::TEST_BLOCK_PHYSICAL);
return false;
}
}
constexpr bool is_real_type(extent_types_t type) {
if (type <= extent_types_t::OBJECT_DATA_BLOCK ||
(type >= extent_types_t::TEST_BLOCK &&
type <= extent_types_t::BACKREF_LEAF)) {
assert(is_logical_type(type) ||
is_physical_type(type));
return true;
} else {
assert(!is_logical_type(type) &&
!is_physical_type(type));
return false;
}
}
std::ostream &operator<<(std::ostream &out, extent_types_t t);
/**
@ -1304,8 +1345,7 @@ enum class data_category_t : uint8_t {
std::ostream &operator<<(std::ostream &out, data_category_t c);
constexpr data_category_t get_extent_category(extent_types_t type) {
if (type == extent_types_t::OBJECT_DATA_BLOCK ||
type == extent_types_t::TEST_BLOCK) {
if (is_data_type(type)) {
return data_category_t::DATA;
} else {
return data_category_t::METADATA;
@ -2290,6 +2330,27 @@ void minus_srcs(counter_by_src_t<CounterT>& base,
}
}
template <typename CounterT>
using counter_by_extent_t = std::array<CounterT, EXTENT_TYPES_MAX>;
template <typename CounterT>
CounterT& get_by_ext(
counter_by_extent_t<CounterT>& counters_by_ext,
extent_types_t ext) {
auto index = static_cast<uint8_t>(ext);
assert(index < EXTENT_TYPES_MAX);
return counters_by_ext[index];
}
template <typename CounterT>
const CounterT& get_by_ext(
const counter_by_extent_t<CounterT>& counters_by_ext,
extent_types_t ext) {
auto index = static_cast<uint8_t>(ext);
assert(index < EXTENT_TYPES_MAX);
return counters_by_ext[index];
}
struct grouped_io_stats {
uint64_t num_io = 0;
uint64_t num_io_grouped = 0;
@ -2496,6 +2557,103 @@ struct shard_stats_t {
}
};
struct cache_io_stats_t {
uint64_t in_size = 0;
uint64_t in_num_extents = 0;
uint64_t out_size = 0;
uint64_t out_num_extents = 0;
bool is_empty() const {
return in_num_extents == 0 && out_num_extents == 0;
}
double get_in_mbs(double seconds) const {
return (in_size>>12)/(seconds*256);
}
double get_in_avg_kb() const {
return (in_size>>10)/static_cast<double>(in_num_extents);
}
double get_out_mbs(double seconds) const {
return (out_size>>12)/(seconds*256);
}
double get_out_avg_kb() const {
return (out_size>>10)/static_cast<double>(out_num_extents);
}
void account_in(extent_len_t size) {
in_size += size;
++in_num_extents;
}
void account_out(extent_len_t size) {
out_size += size;
++out_num_extents;
}
void minus(const cache_io_stats_t& o) {
in_size -= o.in_size;
in_num_extents -= o.in_num_extents;
out_size -= o.out_size;
out_num_extents -= o.out_num_extents;
}
void add(const cache_io_stats_t& o) {
in_size += o.in_size;
in_num_extents += o.in_num_extents;
out_size += o.out_size;
out_num_extents += o.out_num_extents;
}
};
struct cache_io_stats_printer_t {
double seconds;
const cache_io_stats_t &stats;
};
std::ostream& operator<<(std::ostream&, const cache_io_stats_printer_t&);
struct cache_size_stats_t {
uint64_t size = 0;
uint64_t num_extents = 0;
double get_mb() const {
return (size>>12)/static_cast<double>(256);
}
double get_avg_kb() const {
return (size>>10)/static_cast<double>(num_extents);
}
void account_in(extent_len_t sz) {
size += sz;
++num_extents;
}
void account_out(extent_len_t sz) {
assert(size >= sz);
assert(num_extents > 0);
size -= sz;
--num_extents;
}
void add(const cache_size_stats_t& o) {
size += o.size;
num_extents += o.num_extents;
}
};
std::ostream& operator<<(std::ostream&, const cache_size_stats_t&);
struct cache_stats_t {
cache_size_stats_t lru_sizes;
cache_io_stats_t lru_io;
void add(const cache_stats_t& o) {
lru_sizes.add(o.lru_sizes);
lru_io.add(o.lru_io);
}
};
}
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::seastore_meta_t)
@ -2513,6 +2671,8 @@ WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::alloc_delta_t)
WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::segment_tail_t)
#if FMT_VERSION >= 90000
template <> struct fmt::formatter<crimson::os::seastore::cache_io_stats_printer_t> : fmt::ostream_formatter {};
template <> struct fmt::formatter<crimson::os::seastore::cache_size_stats_t> : fmt::ostream_formatter {};
template <> struct fmt::formatter<crimson::os::seastore::data_category_t> : fmt::ostream_formatter {};
template <> struct fmt::formatter<crimson::os::seastore::delta_info_t> : fmt::ostream_formatter {};
template <> struct fmt::formatter<crimson::os::seastore::device_id_printer_t> : fmt::ostream_formatter {};

View File

@ -101,7 +101,7 @@ public:
iter != read_set.end()) {
// placeholder in read-set should be in the retired-set
// at the same time.
assert(iter->ref->get_type() != extent_types_t::RETIRED_PLACEHOLDER);
assert(!is_retired_placeholder_type(iter->ref->get_type()));
if (out)
*out = iter->ref;
SUBTRACET(seastore_cache, "{} is present in read_set -- {}",
@ -270,9 +270,9 @@ public:
void replace_placeholder(CachedExtent& placeholder, CachedExtent& extent) {
ceph_assert(!is_weak());
assert(placeholder.get_type() == extent_types_t::RETIRED_PLACEHOLDER);
assert(extent.get_type() != extent_types_t::RETIRED_PLACEHOLDER);
assert(extent.get_type() != extent_types_t::ROOT);
assert(is_retired_placeholder_type(placeholder.get_type()));
assert(!is_retired_placeholder_type(extent.get_type()));
assert(!is_root_type(extent.get_type()));
assert(extent.get_paddr() == placeholder.get_paddr());
{
auto where = read_set.find(placeholder.get_paddr());

View File

@ -658,7 +658,7 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
return backref_manager->rewrite_extent(t, extent);
}
if (extent->get_type() == extent_types_t::ROOT) {
if (is_root_type(extent->get_type())) {
DEBUGT("rewriting root extent -- {}", t, *extent);
cache->duplicate_for_write(t, extent);
return rewrite_extent_iertr::now();

View File

@ -80,9 +80,14 @@ public:
using close_ertr = base_ertr;
close_ertr::future<> close();
device_stats_t get_device_stats(bool report_detail) const {
device_stats_t get_device_stats(
bool report_detail, double seconds) const {
writer_stats_t journal_stats = journal->get_writer_stats();
return epm->get_device_stats(journal_stats, report_detail);
return epm->get_device_stats(journal_stats, report_detail, seconds);
}
cache_stats_t get_cache_stats(bool report_detail, double seconds) const {
return cache->get_stats(report_detail, seconds);
}
/// Resets transaction

View File

@ -978,14 +978,14 @@ struct transaction_manager_test_t :
extent_types_t::ONODE_BLOCK_STAGED,
extent_types_t::COLL_BLOCK,
extent_types_t::OBJECT_DATA_BLOCK,
extent_types_t::RETIRED_PLACEHOLDER,
extent_types_t::ALLOC_INFO,
extent_types_t::JOURNAL_TAIL,
extent_types_t::TEST_BLOCK,
extent_types_t::TEST_BLOCK_PHYSICAL,
extent_types_t::BACKREF_INTERNAL,
extent_types_t::BACKREF_LEAF
};
// exclude DINK_LADDR_LEAF, RETIRED_PLACEHOLDER,
// ALLOC_INFO, JOURNAL_TAIL
assert(all_extent_types.size() == EXTENT_TYPES_MAX - 4);
std::vector<rewrite_gen_t> all_generations;
for (auto i = INIT_GENERATION; i < REWRITE_GENERATIONS; i++) {
@ -998,8 +998,9 @@ struct transaction_manager_test_t :
// this loop should be consistent with EPM::adjust_generation
for (auto t : all_extent_types) {
assert(is_real_type(t));
expected_generations[t] = {};
if (!is_logical_type(t)) {
if (is_root_type(t) || is_lba_backref_node(t)) {
for (auto gen : all_generations) {
expected_generations[t][gen] = INLINE_GENERATION;
}
@ -1018,7 +1019,7 @@ struct transaction_manager_test_t :
auto update_data_gen_mapping = [&](std::function<rewrite_gen_t(rewrite_gen_t)> func) {
for (auto t : all_extent_types) {
if (!is_logical_type(t)) {
if (is_root_type(t) || is_lba_backref_node(t)) {
continue;
}
for (auto i = INIT_GENERATION + 1; i < REWRITE_GENERATIONS; i++) {