crimson: seastore add OP_ZERO support

Signed-off-by: chunmei-liu <chunmei.liu@intel.com>
This commit is contained in:
chunmei-liu 2022-04-01 20:39:15 -07:00
parent bd82d21943
commit 31dcd54dff
4 changed files with 169 additions and 0 deletions

View File

@ -369,6 +369,90 @@ extent_to_write_list_t get_buffers(laddr_t offset, bufferlist &bl)
return ret;
};
ObjectDataHandler::write_ret ObjectDataHandler::zerowrite(
context_t ctx,
laddr_t _offset,
extent_len_t _len,
lba_pin_list_t &&_pins)
{
return seastar::do_with(
_offset,
_offset + _len,
std::move(_pins),
extent_to_write_list_t(),
bufferlist(),
bufferlist(),
[ctx](laddr_t &offset, laddr_t &end, auto &pins, auto &to_write,
auto &head_bl, auto &end_bl) {
LOG_PREFIX(ObjectDataHandler::zerowrite);
DEBUGT("zerowrite: {}~{}",
ctx.t,
offset,
end);
ceph_assert(pins.size() >= 1);
auto pin_begin = pins.front()->get_key();
ceph_assert(pin_begin <= offset);
auto pin_end = pins.back()->get_key() + pins.back()->get_length();
ceph_assert(pin_end >= end);
return split_pin_left(
ctx,
pins.front(),
offset
).si_then([ctx, pin_begin, &offset, &end, &pins, &to_write, &head_bl]
(auto p) {
auto &[left_extent, headptr] = p;
if (left_extent) {
ceph_assert(left_extent->addr == pin_begin);
to_write.push_front(std::move(*left_extent));
}
if (headptr) {
head_bl.append(*headptr);
offset -= headptr->length();
assert_aligned(offset);
}
return split_pin_right(
ctx,
pins.back(),
end);
}).si_then([ctx, pin_end, &offset, &end, &pins, &to_write, &head_bl, &end_bl]
(auto p) {
auto &[right_extent, tailptr] = p;
if (tailptr) {
end_bl.append(*tailptr);
assert_aligned(end - pins.back()->get_key() + end_bl.length());
}
if (pins.front() == pins.back()) {
bufferptr newbpt = bufferptr(ceph::buffer::create(end -
(offset + head_bl.length()) , 0));
bufferlist newbl;
newbl.append(head_bl);
newbl.append(newbpt);
newbl.append(end_bl);
head_bl.swap(newbl);
to_write.splice(to_write.end(), get_buffers(offset, head_bl));
} else {
to_write.splice(to_write.end(), get_buffers(offset, head_bl));
bufferptr newbpt = bufferptr(ceph::buffer::create(end -
pins.back()->get_key(), 0));
bufferlist newbl;
newbl.append(newbpt);
newbl.append(end_bl);
end_bl.swap(newbl);
to_write.splice(to_write.end(), get_buffers(pins.back()->get_key(), end_bl));
}
if (right_extent) {
ceph_assert((right_extent->addr + right_extent->len) == pin_end);
to_write.push_back(std::move(*right_extent));
}
return write_iertr::now();
}).si_then([ctx, &pins] {
return do_removals(ctx, pins);
}).si_then([ctx, &to_write] {
return do_insertions(ctx, to_write);
});
});
}
ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
context_t ctx,
laddr_t _offset,
@ -436,6 +520,39 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
});
}
ObjectDataHandler::zero_ret ObjectDataHandler::zero(
context_t ctx,
objaddr_t offset,
extent_len_t len)
{
return with_object_data(
ctx,
[this, ctx, offset, len](auto &object_data) {
LOG_PREFIX(ObjectDataHandler::zero);
DEBUGT("zero to {}~{}, object_data: {}~{}, is_null {}",
ctx.t,
offset,
len,
object_data.get_reserved_data_base(),
object_data.get_reserved_data_len(),
object_data.is_null());
return prepare_data_reservation(
ctx,
object_data,
p2roundup(offset + len, ctx.tm.get_block_size())
).si_then([this, ctx, offset, len, &object_data] {
auto logical_offset = object_data.get_reserved_data_base() + offset;
return ctx.tm.get_pins(
ctx.t,
logical_offset,
len
).si_then([this, ctx, logical_offset, len](auto pins) {
return zerowrite(ctx, logical_offset, len, std::move(pins));
});
});
});
}
ObjectDataHandler::write_ret ObjectDataHandler::write(
context_t ctx,
objaddr_t offset,

View File

@ -66,6 +66,13 @@ public:
objaddr_t offset,
const bufferlist &bl);
using zero_iertr = base_iertr;
using zero_ret = zero_iertr::future<>;
zero_ret zero(
context_t ctx,
objaddr_t offset,
extent_len_t len);
/// Reads data in [offset, offset + len)
using read_iertr = base_iertr;
using read_ret = read_iertr::future<bufferlist>;
@ -103,6 +110,14 @@ private:
lba_pin_list_t &&pins ///< [in] set of pins overlapping above region
);
//Zero region [offset, offset + len]
write_ret zerowrite(
context_t ctx, ///< [in] ctx
laddr_t offset, ///< [in] zero offset
extent_len_t len, ///< [in] len to zero
lba_pin_list_t &&pins ///< [in] set of pins overlapping above region
);
/// Ensures object_data reserved region is prepared
write_ret prepare_data_reservation(
context_t ctx,

View File

@ -1167,6 +1167,12 @@ SeaStore::tm_ret SeaStore::_do_transaction_step(
i.decode_bl(hint);
return tm_iertr::now();
}
case Transaction::OP_ZERO:
{
objaddr_t off = op->off;
extent_len_t len = op->len;
return _zero(ctx, get_onode(op->oid), off, len);
}
default:
ERROR("bad op {}", static_cast<unsigned>(op->op));
return crimson::ct_error::input_output_error::make();
@ -1225,6 +1231,33 @@ SeaStore::tm_ret SeaStore::_write(
});
}
SeaStore::tm_ret SeaStore::_zero(
internal_context_t &ctx,
OnodeRef &onode,
objaddr_t offset,
extent_len_t len)
{
LOG_PREFIX(SeaStore::_zero);
DEBUGT("onode={} {}~{}", *ctx.transaction, *onode, offset, len);
if (offset + len >= max_object_size) {
return crimson::ct_error::input_output_error::make();
}
auto &object_size = onode->get_mutable_layout(*ctx.transaction).size;
object_size = std::max<uint64_t>(offset + len, object_size);
return seastar::do_with(
ObjectDataHandler(max_object_size),
[=, &ctx, &onode](auto &objhandler) {
return objhandler.zero(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
*onode,
},
offset,
len);
});
}
SeaStore::omap_set_kvs_ret
SeaStore::_omap_set_kvs(
OnodeRef &onode,

View File

@ -339,6 +339,10 @@ private:
uint64_t offset, size_t len,
ceph::bufferlist &&bl,
uint32_t fadvise_flags);
tm_ret _zero(
internal_context_t &ctx,
OnodeRef &onode,
objaddr_t offset, extent_len_t len);
tm_ret _omap_set_values(
internal_context_t &ctx,
OnodeRef &onode,