diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 9c6ff428a5c..ea0fb10b5dd 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -209,8 +209,9 @@ std::optional Cache::try_construct_record(Transaction &t) { // First, validate read set for (auto &i: t.read_set) { - if (i->state == CachedExtent::extent_state_t::INVALID) + if (i->state == CachedExtent::extent_state_t::INVALID) { return std::nullopt; + } } record_t record; diff --git a/src/crimson/os/seastore/onode.h b/src/crimson/os/seastore/onode.h index 21c0fba8aa5..955d3a9d802 100644 --- a/src/crimson/os/seastore/onode.h +++ b/src/crimson/os/seastore/onode.h @@ -14,10 +14,29 @@ namespace crimson::os::seastore { struct onode_layout_t { + // around 350 bytes for fixed fields in object_info_t, + // the left are for the variable-sized fields like oid + // FIXME: object_info_t may need to shrinked, at least + // oid doesn't need to be held in it. + static constexpr int MAX_OI_LENGTH = 1024; + // We might want to move the ss field out of onode_layout_t. + // The reason is that ss_attr may grow to relative large, as + // its clone_overlap may grow to a large size, if applications + // set objects to a relative large size(for the purpose of reducing + // the number of objects per OSD, so that all objects' metadata + // can be cached in memory) and do many modifications between + // snapshots. + static constexpr int MAX_SS_LENGTH = 128; + ceph_le32 size{0}; + ceph_le32 oi_size{0}; + ceph_le32 ss_size{0}; omap_root_le_t omap_root; object_data_le_t object_data; + + char oi[MAX_OI_LENGTH]; + char ss[MAX_SS_LENGTH]; } __attribute__((packed)); class Transaction; diff --git a/src/crimson/os/seastore/onode_manager.h b/src/crimson/os/seastore/onode_manager.h index c8b95a4bd42..5031afd33dc 100644 --- a/src/crimson/os/seastore/onode_manager.h +++ b/src/crimson/os/seastore/onode_manager.h @@ -37,7 +37,8 @@ public: return seastar::make_ready_future(); } - using get_or_create_onode_ertr = base_ertr; + using get_or_create_onode_ertr = base_ertr::extend< + crimson::ct_error::value_too_large>; using get_or_create_onode_ret = get_or_create_onode_ertr::future< OnodeRef>; virtual get_or_create_onode_ret get_or_create_onode( @@ -46,7 +47,8 @@ public: return seastar::make_ready_future(); } - using get_or_create_onodes_ertr = base_ertr; + using get_or_create_onodes_ertr = base_ertr::extend< + crimson::ct_error::value_too_large>; using get_or_create_onodes_ret = get_or_create_onodes_ertr::future< std::vector>; virtual get_or_create_onodes_ret get_or_create_onodes( diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc index 6fd7fee5fe8..b75f85e02eb 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc @@ -2,6 +2,7 @@ // vim: ts=8 sw=2 smarttab #include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h" namespace { [[maybe_unused]] seastar::logger& logger() { @@ -41,6 +42,10 @@ FLTreeOnodeManager::get_or_create_onode_ret FLTreeOnodeManager::get_or_create_onode( Transaction &trans, const ghobject_t &hoid) { + if (hoid.hobj.oid.name.length() + hoid.hobj.nspace.length() + > key_view_t::MAX_NS_OID_LENGTH) { + return crimson::ct_error::value_too_large::make(); + } return tree.insert( trans, hoid, OnodeTree::tree_value_config_t{sizeof(onode_layout_t)} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h index ff3cc305e49..d5513cdce68 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h @@ -9,6 +9,7 @@ #include #include "common/hobject.h" +#include "crimson/os/seastore/onode.h" #include "crimson/os/seastore/onode_manager/staged-fltree/fwd.h" namespace crimson::os::seastore::onode { @@ -555,6 +556,13 @@ inline std::ostream& operator<<(std::ostream& os, const key_hobj_t& key) { */ class key_view_t { public: + //FIXME: the length of ns and oid should be defined by osd_max_object_name_len + // and object_max_object_namespace_len in the future + static constexpr int MAX_NS_OID_LENGTH = + (4096 - sizeof(onode_layout_t) * 2) / 4 + - sizeof(shard_pool_t) - sizeof(crush_t) - sizeof(snap_gen_t) + - 8; // size of length field of oid and ns + /** * common interfaces as a full_key_t */ diff --git a/src/crimson/os/seastore/root_block.h b/src/crimson/os/seastore/root_block.h index 57d64ad7ca1..fe7bdf99316 100644 --- a/src/crimson/os/seastore/root_block.h +++ b/src/crimson/os/seastore/root_block.h @@ -38,6 +38,8 @@ struct RootBlock : CachedExtent { root_t root; + std::map meta; + RootBlock() : CachedExtent(0) {} RootBlock(const RootBlock &rhs) = default; @@ -66,6 +68,11 @@ struct RootBlock : CachedExtent { ceph::bufferlist bl = _bl; bl.rebuild(); root = *reinterpret_cast(bl.front().c_str()); + if (root.have_meta) { + ceph::bufferlist meta_bl; + meta_bl.rebuild(ceph::buffer::ptr_node::create(&root.meta[0], root_t::MAX_META_LENGTH)); + decode(meta, meta_bl); + } root.adjust_addrs_from_base(base); } @@ -83,6 +90,7 @@ struct RootBlock : CachedExtent { } root_t &get_root() { return root; } + }; using RootBlockRef = RootBlock::Ref; diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 899019e0bd3..8a477b591d8 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -212,7 +212,17 @@ SeaStore::get_attrs_ertr::future SeaStore::get_attrs( auto c = static_cast(ch.get()); logger().debug("{} {} {}", __func__, c->get_cid(), oid); - return crimson::ct_error::enoent::make(); + return repeat_with_onode( + c, oid, [=](auto &t, auto& onode) { + auto& layout = onode.get_layout(); + SeaStore::attrs_t attrs; + attrs[OI_ATTR] = ceph::bufferptr(&layout.oi[0], layout.oi_size); + attrs[SS_ATTR] = ceph::bufferptr(&layout.ss[0], layout.ss_size); + return attrs; + }).handle_error(crimson::ct_error::input_output_error::handle([this] { + logger().error("EIO when getting attrs"); + abort(); + }), crimson::ct_error::pass_further_all{}); } seastar::future SeaStore::stat( @@ -803,6 +813,31 @@ SeaStore::tm_ret SeaStore::_setattrs( { logger().debug("{} onode={}", __func__, *onode); + auto& layout = onode->get_mutable_layout(*ctx.transaction); + for (auto& [key, val] : aset) { + if (key == OI_ATTR) { + if (__builtin_expect( + val.length() > onode_layout_t::MAX_OI_LENGTH, + false)) { + logger().error("{} onode={} oi attr too long!"); + return crimson::ct_error::input_output_error::make(); + } + layout.oi_size = val.length(); + val.copy_out(0, val.length(), &layout.oi[0]); + } else if (key == SS_ATTR) { + if (__builtin_expect( + val.length() > onode_layout_t::MAX_SS_LENGTH, + false)) { + logger().error("{} onode={} oi attr too long!"); + return crimson::ct_error::input_output_error::make(); + } + layout.ss_size = val.length(); + val.copy_out(0, val.length(), &layout.ss[0]); + } else { + //FIXME: right now, only OI_ATTR and SS_ATTR are supported + assert(0 == "only OI_ATTR and SS_ATTR are supported for now"); + } + } return tm_ertr::now(); } @@ -876,13 +911,41 @@ boost::intrusive_ptr SeaStore::_get_collection(const coll_t& seastar::future<> SeaStore::write_meta(const std::string& key, const std::string& value) { - return seastar::make_ready_future<>(); + logger().debug("{}, key: {}; value: {}", __func__, key, value); + return seastar::do_with(key, value, + [this](auto& key, auto& value) { + return repeat_eagain([this, &key, &value] { + auto t = transaction_manager->create_transaction(); + return transaction_manager->get_root(*t).safe_then( + [this, t=std::move(t), &key, &value](auto root) mutable { + transaction_manager->update_root_meta(*t, key, value); + return transaction_manager->submit_transaction(std::move(t)); + }); + }); + }).handle_error( + crimson::ct_error::assert_all{"Invalid error in Seastar::write_meta"} + ); } seastar::future> SeaStore::read_meta(const std::string& key) { - return seastar::make_ready_future>( - std::make_tuple(0, ""s)); + logger().debug("{}, key: {}", __func__, key); + return seastar::do_with(transaction_manager->create_transaction(), key, + [this](auto& t, auto& key) { + return transaction_manager->get_root(*t).safe_then( + [this, &key](auto root) { + auto& meta = root->meta; + auto it = meta.find(key); + if (it != meta.end()) { + return seastar::make_ready_future>( + std::make_tuple(0, it->second)); + } + return seastar::make_ready_future>( + std::make_tuple(-1, std::string(""))); + }); + }).handle_error( + crimson::ct_error::assert_all{"Invalid error in Seastar::write_meta"} + ); } uuid_d SeaStore::get_fsid() const diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 59b329c0fcc..9b0e43fb801 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -655,9 +655,14 @@ public: * TODO: generalize this to permit more than one lba_manager implementation */ struct __attribute__((packed)) root_t { + + static constexpr int MAX_META_LENGTH = 1024; + lba_root_t lba_root; laddr_le_t onode_root; coll_root_le_t collection_root; + char meta[MAX_META_LENGTH]; + bool have_meta = false; void adjust_addrs_from_base(paddr_t base) { lba_root.adjust_addrs_from_base(base); diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index d17907a3fa9..7a46e5c2993 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -15,6 +15,7 @@ namespace crimson::os::seastore { struct retired_extent_gate_t; +class SeaStore; /** * Transaction @@ -161,6 +162,8 @@ public: write_set.erase(*i++); } } + + friend class crimson::os::seastore::SeaStore; }; using TransactionRef = Transaction::Ref; diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index a49af8c9e5c..68736657f09 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -374,6 +374,53 @@ public: return segment_manager.release(id); } + /** + * get_root + * + * Get root block's ondisk layout + */ + using get_root_ertr = base_ertr; + using get_root_ret = get_root_ertr::future; + get_root_ret get_root(Transaction &t) { + return cache->get_root(t); + } + + /** + * update_root_meta + * + * modify root block's meta field + */ + using update_root_meta_ertr = base_ertr::extend< + crimson::ct_error::value_too_large>; + using update_root_meta_ret = update_root_meta_ertr::future<>; + update_root_meta_ret update_root_meta(Transaction& t, + const std::string& key, + const std::string& value) { + auto root = cache->get_root_fast(t); + root = cache->duplicate_for_write(t, root)->cast(); + root->meta[key] = value; + + // calculate meta size + // TODO: we probably need a uniformal interface for calcuting + // the encoded size of data structures + uint32_t meta_size = 4; // initial 4 bytes for std::map size + for (auto& [key, val] : root->meta) { + // sizes of length fields for key and val + sizes of key and val + meta_size += 8 + key.length() + val.length(); + } + + if (meta_size > root_t::MAX_META_LENGTH) { + return crimson::ct_error::value_too_large::make(); + } + + ceph::bufferlist bl(1); + bl.rebuild(ceph::buffer::ptr_node::create( + &root->get_root().meta[0], root_t::MAX_META_LENGTH)); + encode(root->meta, bl); + root->get_root().have_meta = true; + return update_root_meta_ertr::now(); + } + /** * read_onode_root * diff --git a/src/test/crimson/seastore/test_seastore.cc b/src/test/crimson/seastore/test_seastore.cc index 33837bdbb9e..eb20b965d61 100644 --- a/src/test/crimson/seastore/test_seastore.cc +++ b/src/test/crimson/seastore/test_seastore.cc @@ -53,6 +53,17 @@ struct seastore_test_t : std::move(t)).get0(); } + void set_meta( + const std::string& key, + const std::string& value) { + return seastore->write_meta(key, value).get0(); + } + + std::tuple get_meta( + const std::string& key) { + return seastore->read_meta(key).get(); + } + struct object_state_t { const coll_t cid; const CollectionRef coll; @@ -166,6 +177,21 @@ struct seastore_test_t : EXPECT_EQ(contents.length(), st.st_size); } + void set_attr_oi( + SeaStore &seastore, + bufferlist& val) { + CTransaction t; + t.setattr(cid, oid, OI_ATTR, val); + seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + SeaStore::attrs_t get_attr_oi( + SeaStore &seastore) { + return seastore.get_attrs( + coll, oid).handle_error(SeaStore::get_attrs_ertr::discard_all{}).get(); + } void check_omap_key( SeaStore &seastore, @@ -268,6 +294,20 @@ TEST_F(seastore_test_t, collection_create_list_remove) }); } +TEST_F(seastore_test_t, meta) { + run_async([this] { + set_meta("key1", "value1"); + set_meta("key2", "value2"); + + const auto [ret1, value1] = get_meta("key1"); + const auto [ret2, value2] = get_meta("key2"); + EXPECT_EQ(ret1, 0); + EXPECT_EQ(ret2, 0); + EXPECT_EQ(value1, "value1"); + EXPECT_EQ(value2, "value2"); + }); +} + TEST_F(seastore_test_t, touch_stat) { run_async([this] { @@ -306,6 +346,23 @@ TEST_F(seastore_test_t, omap_test_simple) }); } +TEST_F(seastore_test_t, attr) +{ + run_async([this] { + auto& test_obj = get_object(make_oid(0)); + std::string s("asdfasdfasdf"); + bufferlist bl; + encode(s, bl); + test_obj.set_attr_oi(*seastore, bl); + auto attrs = test_obj.get_attr_oi(*seastore); + std::string s2; + bufferlist bl2; + bl2.push_back(attrs[OI_ATTR]); + decode(s2, bl); + EXPECT_EQ(s, s2); + }); +} + TEST_F(seastore_test_t, omap_test_iterator) { run_async([this] {