crimson/onode-staged-tree: implement size upper-bounds to ns/oid

Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
This commit is contained in:
Yingxin Cheng 2021-06-02 12:47:55 +08:00
parent 431d69ebd6
commit 6fb8807678
11 changed files with 94 additions and 42 deletions

View File

@ -4,7 +4,6 @@
#include "crimson/os/seastore/logging.h"
#include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h"
#include "crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h"
namespace crimson::os::seastore::onode {
@ -41,10 +40,6 @@ FLTreeOnodeManager::get_or_create_onode(
const ghobject_t &hoid)
{
LOG_PREFIX(FLTreeOnodeManager::get_or_create_onode);
if (hoid.hobj.oid.name.length() + hoid.hobj.nspace.length()
> key_view_t::MAX_NS_OID_LENGTH) {
return crimson::ct_error::value_too_large::make();
}
return tree.insert(
trans, hoid,
OnodeTree::tree_value_config_t{sizeof(onode_layout_t)}

View File

@ -11,7 +11,9 @@ namespace crimson::os::seastore::onode {
struct FLTreeOnode final : Onode, Value {
static constexpr tree_conf_t TREE_CONF = {
value_magic_t::ONODE
value_magic_t::ONODE,
128, // max_ns_size
320 // max_oid_size
};
enum class status_t {

View File

@ -64,6 +64,8 @@ using node_offset_t = uint16_t;
constexpr node_offset_t DISK_BLOCK_SIZE = 1u << 12;
constexpr node_offset_t NODE_BLOCK_SIZE = DISK_BLOCK_SIZE * 1u;
using string_size_t = uint16_t;
enum class MatchKindBS : int8_t { NE = -1, EQ = 0 };
enum class MatchKindCMP : int8_t { LT = -1, EQ = 0, GT };

View File

@ -144,8 +144,6 @@ inline MatchKindCMP compare_to(const snap_gen_t& l, const snap_gen_t& r) {
*/
struct string_key_view_t {
enum class Type {MIN, STR, MAX};
// presumably the maximum string length is 2KiB
using string_size_t = uint16_t;
static constexpr auto MARKER_MAX = std::numeric_limits<string_size_t>::max();
static constexpr auto MARKER_MIN = std::numeric_limits<string_size_t>::max() - 1;
static constexpr auto VALID_UPPER_BOUND = std::numeric_limits<string_size_t>::max() - 2;
@ -273,7 +271,6 @@ struct string_key_view_t {
*/
class string_view_masked_t {
public:
using string_size_t = string_key_view_t::string_size_t;
using Type = string_key_view_t::Type;
explicit string_view_masked_t(const string_key_view_t& index)
: type{index.type()} {
@ -392,7 +389,6 @@ inline std::ostream& operator<<(std::ostream& os, const string_view_masked_t& ma
}
struct ns_oid_view_t {
using string_size_t = string_key_view_t::string_size_t;
using Type = string_key_view_t::Type;
ns_oid_view_t(const char* p_end) : nspace(p_end), oid(nspace.p_next_end()) {}
@ -596,13 +592,6 @@ inline std::ostream& operator<<(std::ostream& os, const key_hobj_t& key) {
*/
class key_view_t {
public:
//FIXME: the length of ns and oid should be defined by osd_max_object_name_len
// and object_max_object_namespace_len in the future
static constexpr int MAX_NS_OID_LENGTH =
(4096 - sizeof(onode_layout_t) * 2) / 4
- sizeof(shard_pool_t) - sizeof(crush_t) - sizeof(snap_gen_t)
- 8; // size of length field of oid and ns
/**
* common interfaces as a full_key_t
*/

View File

@ -7,6 +7,7 @@
#include "common/hobject.h"
#include "crimson/common/type_helpers.h"
#include "crimson/os/seastore/logging.h"
#include "fwd.h"
#include "node.h"
@ -79,8 +80,13 @@ class Btree {
// XXX: return key_view_t to avoid unecessary ghobject_t constructions
ghobject_t get_ghobj() const {
assert(!is_end());
return p_cursor->get_key_view(
p_tree->value_builder.get_header_magic()).to_ghobj();
auto view = p_cursor->get_key_view(
p_tree->value_builder.get_header_magic());
assert(view.nspace().size() <=
p_tree->value_builder.get_max_ns_size());
assert(view.oid().size() <=
p_tree->value_builder.get_max_oid_size());
return view.to_ghobj();
}
ValueImpl value() {
@ -236,8 +242,20 @@ class Btree {
struct tree_value_config_t {
value_size_t payload_size = 256;
};
eagain_future<std::pair<Cursor, bool>>
using insert_ertr = eagain_ertr::extend<
crimson::ct_error::value_too_large>;
insert_ertr::future<std::pair<Cursor, bool>>
insert(Transaction& t, const ghobject_t& obj, tree_value_config_t _vconf) {
if (obj.hobj.nspace.size() > value_builder.get_max_ns_size()) {
ERRORT("namespace size {} too large to insert {}",
t, obj.hobj.nspace.size(), key_hobj_t{obj});
return crimson::ct_error::value_too_large::make();
}
if (obj.hobj.oid.name.size() > value_builder.get_max_oid_size()) {
ERRORT("oid size {} too large to insert {}",
t, obj.hobj.oid.name.size(), key_hobj_t{obj});
return crimson::ct_error::value_too_large::make();
}
value_config_t vconf{value_builder.get_header_magic(), _vconf.payload_size};
return seastar::do_with(
full_key_t<KeyT::HOBJ>(obj),

View File

@ -323,7 +323,12 @@ class TreeBuilder {
#else
return eagain_ertr::make_ready_future<BtreeCursor>(cursor);
#endif
});
}).handle_error(
[] (const crimson::ct_error::value_too_large& e) {
ceph_abort("impossible path");
},
crimson::ct_error::pass_further_all{}
);
}
eagain_future<> insert(Transaction& t) {

View File

@ -5,6 +5,7 @@
#include "node.h"
#include "node_delta_recorder.h"
#include "node_layout.h"
// value implementations
#include "test/crimson/seastore/onode_tree/test_value.h"
@ -88,8 +89,11 @@ build_value_recorder_by_type(ceph::bufferlist& encoded,
case value_magic_t::ONODE:
ret = std::make_unique<FLTreeOnode::Recorder>(encoded);
break;
case value_magic_t::TEST:
ret = std::make_unique<TestValue::Recorder>(encoded);
case value_magic_t::TEST_UNBOUND:
ret = std::make_unique<UnboundedValue::Recorder>(encoded);
break;
case value_magic_t::TEST_BOUNDED:
ret = std::make_unique<BoundedValue::Recorder>(encoded);
break;
default:
ret = nullptr;
@ -99,4 +103,12 @@ build_value_recorder_by_type(ceph::bufferlist& encoded,
return ret;
}
void validate_tree_config(const tree_conf_t& conf)
{
ceph_assert(conf.max_ns_size <
string_key_view_t::VALID_UPPER_BOUND);
ceph_assert(conf.max_oid_size <
string_key_view_t::VALID_UPPER_BOUND);
}
}

View File

@ -17,14 +17,17 @@ namespace crimson::os::seastore::onode {
using value_size_t = uint16_t;
enum class value_magic_t : uint8_t {
ONODE = 0x52,
TEST,
TEST_UNBOUND,
TEST_BOUNDED,
};
inline std::ostream& operator<<(std::ostream& os, const value_magic_t& magic) {
switch (magic) {
case value_magic_t::ONODE:
return os << "ONODE";
case value_magic_t::TEST:
return os << "TEST";
case value_magic_t::TEST_UNBOUND:
return os << "TEST_UNBOUND";
case value_magic_t::TEST_BOUNDED:
return os << "TEST_BOUNDED";
default:
return os << "UNKNOWN(" << magic << ")";
}
@ -154,6 +157,8 @@ class ValueDeltaRecorder {
*/
struct tree_conf_t {
value_magic_t value_magic;
string_size_t max_ns_size;
string_size_t max_oid_size;
};
class tree_cursor_t;
@ -249,6 +254,8 @@ class Value {
*/
struct ValueBuilder {
virtual value_magic_t get_header_magic() const = 0;
virtual string_size_t get_max_ns_size() const = 0;
virtual string_size_t get_max_oid_size() const = 0;
virtual std::unique_ptr<ValueDeltaRecorder>
build_value_recorder(ceph::bufferlist&) const = 0;
};
@ -260,9 +267,19 @@ struct ValueBuilder {
*/
template <typename ValueImpl>
struct ValueBuilderImpl final : public ValueBuilder {
ValueBuilderImpl() {
validate_tree_config(ValueImpl::TREE_CONF);
}
value_magic_t get_header_magic() const {
return ValueImpl::TREE_CONF.value_magic;
}
string_size_t get_max_ns_size() const override {
return ValueImpl::TREE_CONF.max_ns_size;
}
string_size_t get_max_oid_size() const override {
return ValueImpl::TREE_CONF.max_oid_size;
}
std::unique_ptr<ValueDeltaRecorder>
build_value_recorder(ceph::bufferlist& encoded) const override {
@ -280,6 +297,8 @@ struct ValueBuilderImpl final : public ValueBuilder {
}
};
void validate_tree_config(const tree_conf_t& conf);
/**
* Get the value recorder by type (the magic value) when the ValueBuilder is
* unavailable.

View File

@ -26,6 +26,8 @@ namespace {
constexpr bool IS_DUMMY_SYNC = false;
using DummyManager = DummyNodeExtentManager<IS_DUMMY_SYNC>;
using UnboundedBtree = Btree<UnboundedValue>;
[[maybe_unused]] seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_test);
}
@ -140,7 +142,7 @@ TEST_F(a_basic_test_t, 2_node_sizes)
run_async([this] {
auto nm = NodeExtentManager::create_dummy(IS_DUMMY_SYNC);
auto t = make_test_transaction();
ValueBuilderImpl<TestValue> vb;
ValueBuilderImpl<UnboundedValue> vb;
context_t c{*nm, vb, *t};
std::array<std::pair<NodeImplURef, NodeExtentMutable>, 16> nodes = {
InternalNode0::allocate(c, false, 1u).unsafe_get0().make_pair(),
@ -171,15 +173,13 @@ TEST_F(a_basic_test_t, 2_node_sizes)
});
}
using TestBtree = Btree<TestValue>;
struct b_dummy_tree_test_t : public seastar_test_suite_t {
NodeExtentManagerURef moved_nm;
TransactionRef ref_t;
Transaction& t;
ValueBuilderImpl<TestValue> vb;
ValueBuilderImpl<UnboundedValue> vb;
context_t c;
TestBtree tree;
UnboundedBtree tree;
b_dummy_tree_test_t()
: moved_nm{NodeExtentManager::create_dummy(IS_DUMMY_SYNC)},
@ -209,7 +209,7 @@ TEST_F(b_dummy_tree_test_t, 3_random_insert_erase_leaf_node)
ASSERT_TRUE(tree.last(t).unsafe_get0().is_end());
std::map<ghobject_t,
std::tuple<test_item_t, TestBtree::Cursor>> insert_history;
std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history;
auto f_validate_insert_new = [this, &insert_history] (
const ghobject_t& key, const test_item_t& value) {
@ -496,7 +496,7 @@ class TestTree {
// clone
auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC);
auto p_dummy = static_cast<DummyManager*>(ref_dummy.get());
TestBtree tree_clone(std::move(ref_dummy));
UnboundedBtree tree_clone(std::move(ref_dummy));
auto ref_t_clone = make_test_transaction();
Transaction& t_clone = *ref_t_clone;
tree_clone.test_clone_from(t_clone, t, tree).unsafe_get0();
@ -577,12 +577,12 @@ class TestTree {
NodeExtentManagerURef moved_nm;
TransactionRef ref_t;
Transaction& t;
ValueBuilderImpl<TestValue> vb;
ValueBuilderImpl<UnboundedValue> vb;
context_t c;
TestBtree tree;
UnboundedBtree tree;
Values<test_item_t> values;
std::map<ghobject_t,
std::tuple<test_item_t, TestBtree::Cursor>> insert_history;
std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history;
};
struct c_dummy_test_t : public seastar_test_suite_t {};
@ -1239,9 +1239,9 @@ class DummyChildPool {
Transaction& t() const { return *ref_t; }
std::set<Ref<DummyChild>> tracked_children;
std::optional<TestBtree> p_btree;
std::optional<UnboundedBtree> p_btree;
DummyManager* p_dummy = nullptr;
ValueBuilderImpl<TestValue> vb;
ValueBuilderImpl<UnboundedValue> vb;
TransactionRef ref_t = make_test_transaction();
std::random_device rd;
@ -1526,7 +1526,7 @@ TEST_F(d_seastore_tm_test_t, 6_random_tree_insert_erase)
auto moved_nm = (TEST_SEASTORE ? NodeExtentManager::create_seastore(*tm)
: NodeExtentManager::create_dummy(IS_DUMMY_SYNC));
auto p_nm = moved_nm.get();
auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, TestValue>>(
auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, BoundedValue>>(
kvs, std::move(moved_nm));
{
auto t = tm->create_transaction();
@ -1623,7 +1623,7 @@ TEST_F(d_seastore_tm_test_t, 7_tree_insert_erase_eagain)
auto moved_nm = NodeExtentManager::create_seastore(
*tm, L_ADDR_MIN, EAGAIN_PROBABILITY);
auto p_nm = static_cast<SeastoreNodeExtentManager<true>*>(moved_nm.get());
auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, TestValue>>(
auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, BoundedValue>>(
kvs, std::move(moved_nm));
unsigned num_ops = 0;
unsigned num_ops_eagain = 0;

View File

@ -36,10 +36,15 @@ inline std::ostream& operator<<(std::ostream& os, const test_item_t& item) {
return os << "TestItem(#" << item.id << ", " << item.size << "B)";
}
template <value_magic_t MAGIC,
string_size_t MAX_NS_SIZE,
string_size_t MAX_OID_SIZE>
class TestValue final : public Value {
public:
static constexpr tree_conf_t TREE_CONF = {
value_magic_t::TEST
MAGIC,
MAX_NS_SIZE,
MAX_OID_SIZE
};
using id_t = test_item_t::id_t;
@ -189,4 +194,9 @@ class TestValue final : public Value {
}
};
using UnboundedValue = TestValue<
value_magic_t::TEST_UNBOUND, 4096, 4096>;
using BoundedValue = TestValue<
value_magic_t::TEST_BOUNDED, 320, 320>;
}

View File

@ -30,7 +30,7 @@ class PerfTree : public TMTestState {
seastar::future<> run(KVPool<test_item_t>& kvs, double erase_ratio) {
return tm_setup().then([this, &kvs, erase_ratio] {
return seastar::async([this, &kvs, erase_ratio] {
auto tree = std::make_unique<TreeBuilder<TRACK, TestValue>>(kvs,
auto tree = std::make_unique<TreeBuilder<TRACK, BoundedValue>>(kvs,
(is_dummy ? NodeExtentManager::create_dummy(true)
: NodeExtentManager::create_seastore(*tm)));
{