crimson/os/seastore: make onode data/metadata laddr space reservation configurable

Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
This commit is contained in:
Xuehan Xu 2021-12-12 16:43:30 +08:00
parent 53d8f0855c
commit d2235ba3b9
9 changed files with 87 additions and 35 deletions

View File

@ -65,3 +65,13 @@ options:
level: dev
desc: The record fullness threshold to flush a journal batch
default: 0.95
- name: seastore_default_max_object_size
type: uint
level: dev
desc: default logical address space reservation for seastore objects' data
default: 16777216
- name: seastore_default_object_metadata_reservation
type: uint
level: dev
desc: default logical address space reservation for seastore objects' metadata
default: 16777216

View File

@ -15,16 +15,6 @@ namespace {
}
namespace crimson::os::seastore {
/**
* MAX_OBJECT_SIZE
*
* For now, we allocate a fixed region of laddr space of size MAX_OBJECT_SIZE
* for any object. In the future, once we have the ability to remap logical
* mappings (necessary for clone), we'll add the ability to grow and shrink
* these regions and remove this assumption.
*/
static constexpr extent_len_t MAX_OBJECT_SIZE = Onode::DEFAULT_DATA_RESERVATION;
#define assert_aligned(x) ceph_assert(((x)%ctx.tm.get_block_size()) == 0)
using context_t = ObjectDataHandler::context_t;
@ -261,9 +251,9 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
extent_len_t size)
{
LOG_PREFIX(ObjectDataHandler::prepare_data_reservation);
ceph_assert(size <= MAX_OBJECT_SIZE);
ceph_assert(size <= max_object_size);
if (!object_data.is_null()) {
ceph_assert(object_data.get_reserved_data_len() == MAX_OBJECT_SIZE);
ceph_assert(object_data.get_reserved_data_len() == max_object_size);
DEBUGT("reservation present: {}~{}",
ctx.t,
object_data.get_reserved_data_base(),
@ -273,13 +263,13 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
DEBUGT("reserving: {}~{}",
ctx.t,
ctx.onode.get_data_hint(),
MAX_OBJECT_SIZE);
max_object_size);
return ctx.tm.reserve_region(
ctx.t,
ctx.onode.get_data_hint(),
MAX_OBJECT_SIZE
).si_then([&object_data](auto pin) {
ceph_assert(pin->get_length() == MAX_OBJECT_SIZE);
max_object_size
).si_then([max_object_size=max_object_size, &object_data](auto pin) {
ceph_assert(pin->get_length() == max_object_size);
object_data.update_reserved(
pin->get_laddr(),
pin->get_length());

View File

@ -50,6 +50,8 @@ class ObjectDataHandler {
public:
using base_iertr = TransactionManager::base_iertr;
ObjectDataHandler(uint32_t mos) : max_object_size(mos) {}
struct context_t {
TransactionManager &tm;
Transaction &t;
@ -104,6 +106,16 @@ private:
context_t ctx,
object_data_t &object_data,
extent_len_t size);
private:
/**
* max_object_size
*
* For now, we allocate a fixed region of laddr space of size max_object_size
* for any object. In the future, once we have the ability to remap logical
* mappings (necessary for clone), we'll add the ability to grow and shrink
* these regions and remove this assumption.
*/
const uint32_t max_object_size = 0;
};
}

View File

@ -54,19 +54,23 @@ class Onode : public boost::intrusive_ref_counter<
{
protected:
virtual laddr_t get_hint() const = 0;
const uint32_t default_metadata_offset = 0;
const uint32_t default_metadata_range = 0;
public:
static constexpr uint32_t DEFAULT_DATA_RESERVATION = 16<<20;
static constexpr uint32_t DEFAULT_METADATA_OFFSET =
DEFAULT_DATA_RESERVATION;
static constexpr uint32_t DEFAULT_METADATA_RANGE = 16<<20;
Onode(uint32_t ddr, uint32_t dmr)
: default_metadata_offset(ddr),
default_metadata_range(dmr)
{}
virtual const onode_layout_t &get_layout() const = 0;
virtual onode_layout_t &get_mutable_layout(Transaction &t) = 0;
virtual ~Onode() = default;
laddr_t get_metadata_hint() const {
return get_hint() + DEFAULT_METADATA_OFFSET +
((uint32_t)std::rand() % DEFAULT_METADATA_RANGE);
assert(default_metadata_offset);
assert(default_metadata_range);
return get_hint() + default_metadata_offset +
((uint32_t)std::rand() % default_metadata_range);
}
laddr_t get_data_hint() const {
return get_hint();

View File

@ -27,7 +27,10 @@ FLTreeOnodeManager::get_onode_ret FLTreeOnodeManager::get_onode(
DEBUGT("no entry for {}", trans, hoid);
return crimson::ct_error::enoent::make();
}
auto val = OnodeRef(new FLTreeOnode(cursor.value()));
auto val = OnodeRef(new FLTreeOnode(
default_data_reservation,
default_metadata_range,
cursor.value()));
return get_onode_iertr::make_ready_future<OnodeRef>(
val
);
@ -43,10 +46,13 @@ FLTreeOnodeManager::get_or_create_onode(
return tree.insert(
trans, hoid,
OnodeTree::tree_value_config_t{sizeof(onode_layout_t)}
).si_then([&trans, &hoid, FNAME](auto p)
).si_then([this, &trans, &hoid, FNAME](auto p)
-> get_or_create_onode_ret {
auto [cursor, created] = std::move(p);
auto val = OnodeRef(new FLTreeOnode(cursor.value()));
auto val = OnodeRef(new FLTreeOnode(
default_data_reservation,
default_metadata_range,
cursor.value()));
if (created) {
DEBUGT("created onode for entry for {}", trans, hoid);
val->get_mutable_layout(trans) = onode_layout_t{};

View File

@ -37,7 +37,14 @@ struct FLTreeOnode final : Onode, Value {
FLTreeOnode& operator=(const FLTreeOnode&) = delete;
template <typename... T>
FLTreeOnode(T&&... args) : Value(std::forward<T>(args)...) {}
FLTreeOnode(uint32_t ddr, uint32_t dmr, T&&... args)
: Onode(ddr, dmr),
Value(std::forward<T>(args)...) {}
template <typename... T>
FLTreeOnode(T&&... args)
: Onode(0, 0),
Value(std::forward<T>(args)...) {}
struct Recorder : public ValueDeltaRecorder {
Recorder(bufferlist &bl) : ValueDeltaRecorder(bl) {}
@ -102,12 +109,23 @@ struct FLTreeOnode final : Onode, Value {
using OnodeTree = Btree<FLTreeOnode>;
using crimson::common::get_conf;
class FLTreeOnodeManager : public crimson::os::seastore::OnodeManager {
OnodeTree tree;
uint32_t default_data_reservation = 0;
uint32_t default_metadata_offset = 0;
uint32_t default_metadata_range = 0;
public:
FLTreeOnodeManager(TransactionManager &tm) :
tree(NodeExtentManager::create_seastore(tm)) {}
tree(NodeExtentManager::create_seastore(tm)),
default_data_reservation(
get_conf<uint64_t>("seastore_default_max_object_size")),
default_metadata_offset(default_data_reservation),
default_metadata_range(
get_conf<uint64_t>("seastore_default_object_metadata_reservation"))
{}
mkfs_ret mkfs(Transaction &t) {
return tree.mkfs(t);

View File

@ -68,6 +68,8 @@ public:
}
};
using crimson::common::get_conf;
SeaStore::SeaStore(
const std::string& root,
MDStoreRef mdstore,
@ -80,7 +82,9 @@ SeaStore::SeaStore(
segment_manager(std::move(sm)),
transaction_manager(std::move(tm)),
collection_manager(std::move(cm)),
onode_manager(std::move(om))
onode_manager(std::move(om)),
max_object_size(
get_conf<uint64_t>("seastore_default_max_object_size"))
{
register_metrics();
}
@ -459,7 +463,7 @@ SeaStore::read_errorator::future<ceph::bufferlist> SeaStore::read(
size - offset :
std::min(size - offset, len);
return ObjectDataHandler().read(
return ObjectDataHandler(max_object_size).read(
ObjectDataHandler::context_t{
*transaction_manager,
t,
@ -1069,7 +1073,7 @@ SeaStore::tm_ret SeaStore::_write(
return seastar::do_with(
std::move(_bl),
[=, &ctx, &onode](auto &bl) {
return ObjectDataHandler().write(
return ObjectDataHandler(max_object_size).write(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
@ -1199,7 +1203,7 @@ SeaStore::tm_ret SeaStore::_truncate(
LOG_PREFIX(SeaStore::_truncate);
DEBUGT("onode={} size={}", *ctx.transaction, *onode, size);
onode->get_mutable_layout(*ctx.transaction).size = size;
return ObjectDataHandler().truncate(
return ObjectDataHandler(max_object_size).truncate(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,

View File

@ -305,6 +305,7 @@ private:
TransactionManagerRef transaction_manager;
CollectionManagerRef collection_manager;
OnodeManagerRef onode_manager;
const uint32_t max_object_size = 0;
using tm_iertr = TransactionManager::base_iertr;
using tm_ret = tm_iertr::future<>;

View File

@ -11,6 +11,10 @@ using namespace crimson;
using namespace crimson::os;
using namespace crimson::os::seastore;
#define MAX_OBJECT_SIZE (16<<20)
#define DEFAULT_OBJECT_DATA_RESERVATION (16<<20)
#define DEFAULT_OBJECT_METADATA_RESERVATION (16<<20)
namespace {
[[maybe_unused]] seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_test);
@ -22,6 +26,7 @@ class TestOnode final : public Onode {
bool dirty = false;
public:
TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr) {}
const onode_layout_t &get_layout() const final {
return layout;
}
@ -58,7 +63,7 @@ struct object_data_handler_test_t:
offset,
len));
with_trans_intr(t, [&](auto &t) {
return ObjectDataHandler().write(
return ObjectDataHandler(MAX_OBJECT_SIZE).write(
ObjectDataHandler::context_t{
*tm,
t,
@ -81,7 +86,7 @@ struct object_data_handler_test_t:
0,
size - offset);
with_trans_intr(t, [&](auto &t) {
return ObjectDataHandler().truncate(
return ObjectDataHandler(MAX_OBJECT_SIZE).truncate(
ObjectDataHandler::context_t{
*tm,
t,
@ -100,7 +105,7 @@ struct object_data_handler_test_t:
void read(Transaction &t, objaddr_t offset, extent_len_t len) {
bufferlist bl = with_trans_intr(t, [&](auto &t) {
return ObjectDataHandler().read(
return ObjectDataHandler(MAX_OBJECT_SIZE).read(
ObjectDataHandler::context_t{
*tm,
t,
@ -132,7 +137,9 @@ struct object_data_handler_test_t:
}
seastar::future<> set_up_fut() final {
onode = new TestOnode{};
onode = new TestOnode(
DEFAULT_OBJECT_DATA_RESERVATION,
DEFAULT_OBJECT_METADATA_RESERVATION);
known_contents = buffer::create(4<<20 /* 4MB */);
memset(known_contents.c_str(), 0, known_contents.length());
size = 0;