From 31dcd54dfff081a1a0e45129d0a69331cd059b1e Mon Sep 17 00:00:00 2001 From: chunmei-liu Date: Fri, 1 Apr 2022 20:39:15 -0700 Subject: [PATCH] crimson: seastore add OP_ZERO support Signed-off-by: chunmei-liu --- .../os/seastore/object_data_handler.cc | 117 ++++++++++++++++++ src/crimson/os/seastore/object_data_handler.h | 15 +++ src/crimson/os/seastore/seastore.cc | 33 +++++ src/crimson/os/seastore/seastore.h | 4 + 4 files changed, 169 insertions(+) diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc index e59ad3dee7e..fb50b633e87 100644 --- a/src/crimson/os/seastore/object_data_handler.cc +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -369,6 +369,90 @@ extent_to_write_list_t get_buffers(laddr_t offset, bufferlist &bl) return ret; }; +ObjectDataHandler::write_ret ObjectDataHandler::zerowrite( + context_t ctx, + laddr_t _offset, + extent_len_t _len, + lba_pin_list_t &&_pins) +{ + return seastar::do_with( + _offset, + _offset + _len, + std::move(_pins), + extent_to_write_list_t(), + bufferlist(), + bufferlist(), + [ctx](laddr_t &offset, laddr_t &end, auto &pins, auto &to_write, + auto &head_bl, auto &end_bl) { + LOG_PREFIX(ObjectDataHandler::zerowrite); + DEBUGT("zerowrite: {}~{}", + ctx.t, + offset, + end); + ceph_assert(pins.size() >= 1); + auto pin_begin = pins.front()->get_key(); + ceph_assert(pin_begin <= offset); + auto pin_end = pins.back()->get_key() + pins.back()->get_length(); + ceph_assert(pin_end >= end); + return split_pin_left( + ctx, + pins.front(), + offset + ).si_then([ctx, pin_begin, &offset, &end, &pins, &to_write, &head_bl] + (auto p) { + auto &[left_extent, headptr] = p; + if (left_extent) { + ceph_assert(left_extent->addr == pin_begin); + to_write.push_front(std::move(*left_extent)); + } + if (headptr) { + head_bl.append(*headptr); + offset -= headptr->length(); + assert_aligned(offset); + } + return split_pin_right( + ctx, + pins.back(), + end); + }).si_then([ctx, pin_end, &offset, &end, &pins, &to_write, &head_bl, &end_bl] + (auto p) { + auto &[right_extent, tailptr] = p; + if (tailptr) { + end_bl.append(*tailptr); + assert_aligned(end - pins.back()->get_key() + end_bl.length()); + } + if (pins.front() == pins.back()) { + bufferptr newbpt = bufferptr(ceph::buffer::create(end - + (offset + head_bl.length()) , 0)); + bufferlist newbl; + newbl.append(head_bl); + newbl.append(newbpt); + newbl.append(end_bl); + head_bl.swap(newbl); + to_write.splice(to_write.end(), get_buffers(offset, head_bl)); + } else { + to_write.splice(to_write.end(), get_buffers(offset, head_bl)); + bufferptr newbpt = bufferptr(ceph::buffer::create(end - + pins.back()->get_key(), 0)); + bufferlist newbl; + newbl.append(newbpt); + newbl.append(end_bl); + end_bl.swap(newbl); + to_write.splice(to_write.end(), get_buffers(pins.back()->get_key(), end_bl)); + } + if (right_extent) { + ceph_assert((right_extent->addr + right_extent->len) == pin_end); + to_write.push_back(std::move(*right_extent)); + } + return write_iertr::now(); + }).si_then([ctx, &pins] { + return do_removals(ctx, pins); + }).si_then([ctx, &to_write] { + return do_insertions(ctx, to_write); + }); + }); +} + ObjectDataHandler::write_ret ObjectDataHandler::overwrite( context_t ctx, laddr_t _offset, @@ -436,6 +520,39 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite( }); } +ObjectDataHandler::zero_ret ObjectDataHandler::zero( + context_t ctx, + objaddr_t offset, + extent_len_t len) +{ + return with_object_data( + ctx, + [this, ctx, offset, len](auto &object_data) { + LOG_PREFIX(ObjectDataHandler::zero); + DEBUGT("zero to {}~{}, object_data: {}~{}, is_null {}", + ctx.t, + offset, + len, + object_data.get_reserved_data_base(), + object_data.get_reserved_data_len(), + object_data.is_null()); + return prepare_data_reservation( + ctx, + object_data, + p2roundup(offset + len, ctx.tm.get_block_size()) + ).si_then([this, ctx, offset, len, &object_data] { + auto logical_offset = object_data.get_reserved_data_base() + offset; + return ctx.tm.get_pins( + ctx.t, + logical_offset, + len + ).si_then([this, ctx, logical_offset, len](auto pins) { + return zerowrite(ctx, logical_offset, len, std::move(pins)); + }); + }); + }); +} + ObjectDataHandler::write_ret ObjectDataHandler::write( context_t ctx, objaddr_t offset, diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h index dd91f343623..031ddd510ea 100644 --- a/src/crimson/os/seastore/object_data_handler.h +++ b/src/crimson/os/seastore/object_data_handler.h @@ -66,6 +66,13 @@ public: objaddr_t offset, const bufferlist &bl); + using zero_iertr = base_iertr; + using zero_ret = zero_iertr::future<>; + zero_ret zero( + context_t ctx, + objaddr_t offset, + extent_len_t len); + /// Reads data in [offset, offset + len) using read_iertr = base_iertr; using read_ret = read_iertr::future; @@ -103,6 +110,14 @@ private: lba_pin_list_t &&pins ///< [in] set of pins overlapping above region ); + //Zero region [offset, offset + len] + write_ret zerowrite( + context_t ctx, ///< [in] ctx + laddr_t offset, ///< [in] zero offset + extent_len_t len, ///< [in] len to zero + lba_pin_list_t &&pins ///< [in] set of pins overlapping above region + ); + /// Ensures object_data reserved region is prepared write_ret prepare_data_reservation( context_t ctx, diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 424c0ea8ee9..ad05bf56cb6 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -1167,6 +1167,12 @@ SeaStore::tm_ret SeaStore::_do_transaction_step( i.decode_bl(hint); return tm_iertr::now(); } + case Transaction::OP_ZERO: + { + objaddr_t off = op->off; + extent_len_t len = op->len; + return _zero(ctx, get_onode(op->oid), off, len); + } default: ERROR("bad op {}", static_cast(op->op)); return crimson::ct_error::input_output_error::make(); @@ -1225,6 +1231,33 @@ SeaStore::tm_ret SeaStore::_write( }); } +SeaStore::tm_ret SeaStore::_zero( + internal_context_t &ctx, + OnodeRef &onode, + objaddr_t offset, + extent_len_t len) +{ + LOG_PREFIX(SeaStore::_zero); + DEBUGT("onode={} {}~{}", *ctx.transaction, *onode, offset, len); + if (offset + len >= max_object_size) { + return crimson::ct_error::input_output_error::make(); + } + auto &object_size = onode->get_mutable_layout(*ctx.transaction).size; + object_size = std::max(offset + len, object_size); + return seastar::do_with( + ObjectDataHandler(max_object_size), + [=, &ctx, &onode](auto &objhandler) { + return objhandler.zero( + ObjectDataHandler::context_t{ + *transaction_manager, + *ctx.transaction, + *onode, + }, + offset, + len); + }); +} + SeaStore::omap_set_kvs_ret SeaStore::_omap_set_kvs( OnodeRef &onode, diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index d75b07a06de..f618a746a06 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -339,6 +339,10 @@ private: uint64_t offset, size_t len, ceph::bufferlist &&bl, uint32_t fadvise_flags); + tm_ret _zero( + internal_context_t &ctx, + OnodeRef &onode, + objaddr_t offset, extent_len_t len); tm_ret _omap_set_values( internal_context_t &ctx, OnodeRef &onode,