mirror of
https://github.com/ceph/ceph
synced 2025-02-21 01:47:25 +00:00
crimson/os/seastore: make onode data/metadata laddr space reservation configurable
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
This commit is contained in:
parent
53d8f0855c
commit
d2235ba3b9
@ -65,3 +65,13 @@ options:
|
||||
level: dev
|
||||
desc: The record fullness threshold to flush a journal batch
|
||||
default: 0.95
|
||||
- name: seastore_default_max_object_size
|
||||
type: uint
|
||||
level: dev
|
||||
desc: default logical address space reservation for seastore objects' data
|
||||
default: 16777216
|
||||
- name: seastore_default_object_metadata_reservation
|
||||
type: uint
|
||||
level: dev
|
||||
desc: default logical address space reservation for seastore objects' metadata
|
||||
default: 16777216
|
||||
|
@ -15,16 +15,6 @@ namespace {
|
||||
}
|
||||
|
||||
namespace crimson::os::seastore {
|
||||
|
||||
/**
|
||||
* MAX_OBJECT_SIZE
|
||||
*
|
||||
* For now, we allocate a fixed region of laddr space of size MAX_OBJECT_SIZE
|
||||
* for any object. In the future, once we have the ability to remap logical
|
||||
* mappings (necessary for clone), we'll add the ability to grow and shrink
|
||||
* these regions and remove this assumption.
|
||||
*/
|
||||
static constexpr extent_len_t MAX_OBJECT_SIZE = Onode::DEFAULT_DATA_RESERVATION;
|
||||
#define assert_aligned(x) ceph_assert(((x)%ctx.tm.get_block_size()) == 0)
|
||||
|
||||
using context_t = ObjectDataHandler::context_t;
|
||||
@ -261,9 +251,9 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
|
||||
extent_len_t size)
|
||||
{
|
||||
LOG_PREFIX(ObjectDataHandler::prepare_data_reservation);
|
||||
ceph_assert(size <= MAX_OBJECT_SIZE);
|
||||
ceph_assert(size <= max_object_size);
|
||||
if (!object_data.is_null()) {
|
||||
ceph_assert(object_data.get_reserved_data_len() == MAX_OBJECT_SIZE);
|
||||
ceph_assert(object_data.get_reserved_data_len() == max_object_size);
|
||||
DEBUGT("reservation present: {}~{}",
|
||||
ctx.t,
|
||||
object_data.get_reserved_data_base(),
|
||||
@ -273,13 +263,13 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
|
||||
DEBUGT("reserving: {}~{}",
|
||||
ctx.t,
|
||||
ctx.onode.get_data_hint(),
|
||||
MAX_OBJECT_SIZE);
|
||||
max_object_size);
|
||||
return ctx.tm.reserve_region(
|
||||
ctx.t,
|
||||
ctx.onode.get_data_hint(),
|
||||
MAX_OBJECT_SIZE
|
||||
).si_then([&object_data](auto pin) {
|
||||
ceph_assert(pin->get_length() == MAX_OBJECT_SIZE);
|
||||
max_object_size
|
||||
).si_then([max_object_size=max_object_size, &object_data](auto pin) {
|
||||
ceph_assert(pin->get_length() == max_object_size);
|
||||
object_data.update_reserved(
|
||||
pin->get_laddr(),
|
||||
pin->get_length());
|
||||
|
@ -50,6 +50,8 @@ class ObjectDataHandler {
|
||||
public:
|
||||
using base_iertr = TransactionManager::base_iertr;
|
||||
|
||||
ObjectDataHandler(uint32_t mos) : max_object_size(mos) {}
|
||||
|
||||
struct context_t {
|
||||
TransactionManager &tm;
|
||||
Transaction &t;
|
||||
@ -104,6 +106,16 @@ private:
|
||||
context_t ctx,
|
||||
object_data_t &object_data,
|
||||
extent_len_t size);
|
||||
private:
|
||||
/**
|
||||
* max_object_size
|
||||
*
|
||||
* For now, we allocate a fixed region of laddr space of size max_object_size
|
||||
* for any object. In the future, once we have the ability to remap logical
|
||||
* mappings (necessary for clone), we'll add the ability to grow and shrink
|
||||
* these regions and remove this assumption.
|
||||
*/
|
||||
const uint32_t max_object_size = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -54,19 +54,23 @@ class Onode : public boost::intrusive_ref_counter<
|
||||
{
|
||||
protected:
|
||||
virtual laddr_t get_hint() const = 0;
|
||||
const uint32_t default_metadata_offset = 0;
|
||||
const uint32_t default_metadata_range = 0;
|
||||
public:
|
||||
static constexpr uint32_t DEFAULT_DATA_RESERVATION = 16<<20;
|
||||
static constexpr uint32_t DEFAULT_METADATA_OFFSET =
|
||||
DEFAULT_DATA_RESERVATION;
|
||||
static constexpr uint32_t DEFAULT_METADATA_RANGE = 16<<20;
|
||||
Onode(uint32_t ddr, uint32_t dmr)
|
||||
: default_metadata_offset(ddr),
|
||||
default_metadata_range(dmr)
|
||||
{}
|
||||
|
||||
virtual const onode_layout_t &get_layout() const = 0;
|
||||
virtual onode_layout_t &get_mutable_layout(Transaction &t) = 0;
|
||||
virtual ~Onode() = default;
|
||||
|
||||
laddr_t get_metadata_hint() const {
|
||||
return get_hint() + DEFAULT_METADATA_OFFSET +
|
||||
((uint32_t)std::rand() % DEFAULT_METADATA_RANGE);
|
||||
assert(default_metadata_offset);
|
||||
assert(default_metadata_range);
|
||||
return get_hint() + default_metadata_offset +
|
||||
((uint32_t)std::rand() % default_metadata_range);
|
||||
}
|
||||
laddr_t get_data_hint() const {
|
||||
return get_hint();
|
||||
|
@ -27,7 +27,10 @@ FLTreeOnodeManager::get_onode_ret FLTreeOnodeManager::get_onode(
|
||||
DEBUGT("no entry for {}", trans, hoid);
|
||||
return crimson::ct_error::enoent::make();
|
||||
}
|
||||
auto val = OnodeRef(new FLTreeOnode(cursor.value()));
|
||||
auto val = OnodeRef(new FLTreeOnode(
|
||||
default_data_reservation,
|
||||
default_metadata_range,
|
||||
cursor.value()));
|
||||
return get_onode_iertr::make_ready_future<OnodeRef>(
|
||||
val
|
||||
);
|
||||
@ -43,10 +46,13 @@ FLTreeOnodeManager::get_or_create_onode(
|
||||
return tree.insert(
|
||||
trans, hoid,
|
||||
OnodeTree::tree_value_config_t{sizeof(onode_layout_t)}
|
||||
).si_then([&trans, &hoid, FNAME](auto p)
|
||||
).si_then([this, &trans, &hoid, FNAME](auto p)
|
||||
-> get_or_create_onode_ret {
|
||||
auto [cursor, created] = std::move(p);
|
||||
auto val = OnodeRef(new FLTreeOnode(cursor.value()));
|
||||
auto val = OnodeRef(new FLTreeOnode(
|
||||
default_data_reservation,
|
||||
default_metadata_range,
|
||||
cursor.value()));
|
||||
if (created) {
|
||||
DEBUGT("created onode for entry for {}", trans, hoid);
|
||||
val->get_mutable_layout(trans) = onode_layout_t{};
|
||||
|
@ -37,7 +37,14 @@ struct FLTreeOnode final : Onode, Value {
|
||||
FLTreeOnode& operator=(const FLTreeOnode&) = delete;
|
||||
|
||||
template <typename... T>
|
||||
FLTreeOnode(T&&... args) : Value(std::forward<T>(args)...) {}
|
||||
FLTreeOnode(uint32_t ddr, uint32_t dmr, T&&... args)
|
||||
: Onode(ddr, dmr),
|
||||
Value(std::forward<T>(args)...) {}
|
||||
|
||||
template <typename... T>
|
||||
FLTreeOnode(T&&... args)
|
||||
: Onode(0, 0),
|
||||
Value(std::forward<T>(args)...) {}
|
||||
|
||||
struct Recorder : public ValueDeltaRecorder {
|
||||
Recorder(bufferlist &bl) : ValueDeltaRecorder(bl) {}
|
||||
@ -102,12 +109,23 @@ struct FLTreeOnode final : Onode, Value {
|
||||
|
||||
using OnodeTree = Btree<FLTreeOnode>;
|
||||
|
||||
using crimson::common::get_conf;
|
||||
|
||||
class FLTreeOnodeManager : public crimson::os::seastore::OnodeManager {
|
||||
OnodeTree tree;
|
||||
|
||||
uint32_t default_data_reservation = 0;
|
||||
uint32_t default_metadata_offset = 0;
|
||||
uint32_t default_metadata_range = 0;
|
||||
public:
|
||||
FLTreeOnodeManager(TransactionManager &tm) :
|
||||
tree(NodeExtentManager::create_seastore(tm)) {}
|
||||
tree(NodeExtentManager::create_seastore(tm)),
|
||||
default_data_reservation(
|
||||
get_conf<uint64_t>("seastore_default_max_object_size")),
|
||||
default_metadata_offset(default_data_reservation),
|
||||
default_metadata_range(
|
||||
get_conf<uint64_t>("seastore_default_object_metadata_reservation"))
|
||||
{}
|
||||
|
||||
mkfs_ret mkfs(Transaction &t) {
|
||||
return tree.mkfs(t);
|
||||
|
@ -68,6 +68,8 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
using crimson::common::get_conf;
|
||||
|
||||
SeaStore::SeaStore(
|
||||
const std::string& root,
|
||||
MDStoreRef mdstore,
|
||||
@ -80,7 +82,9 @@ SeaStore::SeaStore(
|
||||
segment_manager(std::move(sm)),
|
||||
transaction_manager(std::move(tm)),
|
||||
collection_manager(std::move(cm)),
|
||||
onode_manager(std::move(om))
|
||||
onode_manager(std::move(om)),
|
||||
max_object_size(
|
||||
get_conf<uint64_t>("seastore_default_max_object_size"))
|
||||
{
|
||||
register_metrics();
|
||||
}
|
||||
@ -459,7 +463,7 @@ SeaStore::read_errorator::future<ceph::bufferlist> SeaStore::read(
|
||||
size - offset :
|
||||
std::min(size - offset, len);
|
||||
|
||||
return ObjectDataHandler().read(
|
||||
return ObjectDataHandler(max_object_size).read(
|
||||
ObjectDataHandler::context_t{
|
||||
*transaction_manager,
|
||||
t,
|
||||
@ -1069,7 +1073,7 @@ SeaStore::tm_ret SeaStore::_write(
|
||||
return seastar::do_with(
|
||||
std::move(_bl),
|
||||
[=, &ctx, &onode](auto &bl) {
|
||||
return ObjectDataHandler().write(
|
||||
return ObjectDataHandler(max_object_size).write(
|
||||
ObjectDataHandler::context_t{
|
||||
*transaction_manager,
|
||||
*ctx.transaction,
|
||||
@ -1199,7 +1203,7 @@ SeaStore::tm_ret SeaStore::_truncate(
|
||||
LOG_PREFIX(SeaStore::_truncate);
|
||||
DEBUGT("onode={} size={}", *ctx.transaction, *onode, size);
|
||||
onode->get_mutable_layout(*ctx.transaction).size = size;
|
||||
return ObjectDataHandler().truncate(
|
||||
return ObjectDataHandler(max_object_size).truncate(
|
||||
ObjectDataHandler::context_t{
|
||||
*transaction_manager,
|
||||
*ctx.transaction,
|
||||
|
@ -305,6 +305,7 @@ private:
|
||||
TransactionManagerRef transaction_manager;
|
||||
CollectionManagerRef collection_manager;
|
||||
OnodeManagerRef onode_manager;
|
||||
const uint32_t max_object_size = 0;
|
||||
|
||||
using tm_iertr = TransactionManager::base_iertr;
|
||||
using tm_ret = tm_iertr::future<>;
|
||||
|
@ -11,6 +11,10 @@ using namespace crimson;
|
||||
using namespace crimson::os;
|
||||
using namespace crimson::os::seastore;
|
||||
|
||||
#define MAX_OBJECT_SIZE (16<<20)
|
||||
#define DEFAULT_OBJECT_DATA_RESERVATION (16<<20)
|
||||
#define DEFAULT_OBJECT_METADATA_RESERVATION (16<<20)
|
||||
|
||||
namespace {
|
||||
[[maybe_unused]] seastar::logger& logger() {
|
||||
return crimson::get_logger(ceph_subsys_test);
|
||||
@ -22,6 +26,7 @@ class TestOnode final : public Onode {
|
||||
bool dirty = false;
|
||||
|
||||
public:
|
||||
TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr) {}
|
||||
const onode_layout_t &get_layout() const final {
|
||||
return layout;
|
||||
}
|
||||
@ -58,7 +63,7 @@ struct object_data_handler_test_t:
|
||||
offset,
|
||||
len));
|
||||
with_trans_intr(t, [&](auto &t) {
|
||||
return ObjectDataHandler().write(
|
||||
return ObjectDataHandler(MAX_OBJECT_SIZE).write(
|
||||
ObjectDataHandler::context_t{
|
||||
*tm,
|
||||
t,
|
||||
@ -81,7 +86,7 @@ struct object_data_handler_test_t:
|
||||
0,
|
||||
size - offset);
|
||||
with_trans_intr(t, [&](auto &t) {
|
||||
return ObjectDataHandler().truncate(
|
||||
return ObjectDataHandler(MAX_OBJECT_SIZE).truncate(
|
||||
ObjectDataHandler::context_t{
|
||||
*tm,
|
||||
t,
|
||||
@ -100,7 +105,7 @@ struct object_data_handler_test_t:
|
||||
|
||||
void read(Transaction &t, objaddr_t offset, extent_len_t len) {
|
||||
bufferlist bl = with_trans_intr(t, [&](auto &t) {
|
||||
return ObjectDataHandler().read(
|
||||
return ObjectDataHandler(MAX_OBJECT_SIZE).read(
|
||||
ObjectDataHandler::context_t{
|
||||
*tm,
|
||||
t,
|
||||
@ -132,7 +137,9 @@ struct object_data_handler_test_t:
|
||||
}
|
||||
|
||||
seastar::future<> set_up_fut() final {
|
||||
onode = new TestOnode{};
|
||||
onode = new TestOnode(
|
||||
DEFAULT_OBJECT_DATA_RESERVATION,
|
||||
DEFAULT_OBJECT_METADATA_RESERVATION);
|
||||
known_contents = buffer::create(4<<20 /* 4MB */);
|
||||
memset(known_contents.c_str(), 0, known_contents.length());
|
||||
size = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user