os/bluestore: new write path

- simplified wal_op_t.  we still have overlays in there, although that
  might need to get removed soon too.
- init_csum cleanup
- totally new write path

Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2016-05-14 08:48:47 -04:00
parent ef99f9446a
commit 9cfb096300
4 changed files with 558 additions and 1253 deletions

File diff suppressed because it is too large Load Diff

View File

@ -938,6 +938,41 @@ private:
// --------------------------------------------------------
// write ops
struct WriteContext {
unsigned fadvise_flags; ///< write flags
bool buffered; ///< buffered write
//map<uint64_t,bluestore_lextent_t> lex_new; ///< new lextents
vector<bluestore_lextent_t> lex_old; ///< must deref blobs
vector<bluestore_blob_t*> blob_new; ///< new blobs
vector<bufferlist> bl_new; ///< new data, for above blobs
WriteContext() : fadvise_flags(0), buffered(false) {}
};
void _do_write_small(
TransContext *txc,
CollectionRef &c,
OnodeRef o,
uint64_t offset, uint64_t length,
bufferlist::iterator& blp,
WriteContext *wctx);
void _do_write_big(
TransContext *txc,
CollectionRef &c,
OnodeRef o,
uint64_t offset, uint64_t length,
bufferlist::iterator& blp,
WriteContext *wctx);
int _do_alloc_write(
TransContext *txc,
WriteContext *wctx);
void _wctx_finish(
TransContext *txc,
CollectionRef& c,
OnodeRef o,
WriteContext *wctx);
int _do_transaction(Transaction *t,
TransContext *txc,
ThreadPool::TPHandle *handle);
@ -971,18 +1006,6 @@ private:
OnodeRef o, bufferlist *bl,
uint64_t offset, uint64_t *length,
uint64_t chunk_size);
int _do_allocate(TransContext *txc,
CollectionRef& c,
OnodeRef o,
uint64_t offset, uint64_t length,
uint32_t fadvise_flags,
bool allow_overlay,
uint64_t *alloc_offset,
uint64_t *alloc_length,
uint64_t *cow_head_extent,
uint64_t *cow_tail_extent,
uint64_t *rmw_cow_head,
uint64_t *rmw_cow_tail);
int _do_write(TransContext *txc,
CollectionRef &c,
OnodeRef o,
@ -992,15 +1015,6 @@ private:
int _touch(TransContext *txc,
CollectionRef& c,
OnodeRef& o);
int _do_write_zero(TransContext *txc,
CollectionRef &c,
OnodeRef o,
uint64_t offset, uint64_t length);
void _do_zero_tail_extent(
TransContext *txc,
CollectionRef& c,
OnodeRef& o,
uint64_t offset);
int _do_zero(TransContext *txc,
CollectionRef& c,
OnodeRef& o,

View File

@ -748,15 +748,10 @@ void bluestore_wal_op_t::encode(bufferlist& bl) const
{
ENCODE_START(1, 1, bl);
::encode(op, bl);
::encode(extent, bl);
::encode(src_extent, bl);
::encode(src_rmw_head, bl);
::encode(src_rmw_tail, bl);
::encode(extents, bl);
::encode(data, bl);
::encode(nid, bl);
::encode(overlays, bl);
if (!overlays.size()) {
::encode(data, bl);
}
::encode(removed_overlays, bl);
ENCODE_FINISH(bl);
}
@ -765,15 +760,10 @@ void bluestore_wal_op_t::decode(bufferlist::iterator& p)
{
DECODE_START(1, p);
::decode(op, p);
::decode(extent, p);
::decode(src_extent, p);
::decode(src_rmw_head, p);
::decode(src_rmw_tail, p);
::decode(extents, p);
::decode(data, p);
::decode(nid, p);
::decode(overlays, p);
if (!overlays.size()) {
::decode(data, p);
}
::decode(removed_overlays, p);
DECODE_FINISH(p);
}
@ -781,21 +771,21 @@ void bluestore_wal_op_t::decode(bufferlist::iterator& p)
void bluestore_wal_op_t::dump(Formatter *f) const
{
f->dump_unsigned("op", (int)op);
f->dump_object("extent", extent);
f->dump_object("src_extent", src_extent);
f->dump_unsigned("src_rmw_head", src_rmw_head);
f->dump_unsigned("src_rmw_tail", src_rmw_tail);
f->dump_unsigned("data_len", data.length());
f->open_array_section("extents");
for (auto& e : extents) {
f->dump_object("extent", e);
}
f->close_section();
f->dump_unsigned("nid", nid);
f->open_array_section("overlays");
for (vector<bluestore_overlay_t>::const_iterator p = overlays.begin();
p != overlays.end(); ++p) {
f->dump_object("overlay", *p);
for (auto& o : overlays) {
f->dump_object("overlay", o);
}
f->close_section();
f->open_array_section("removed_overlays");
for (vector<uint64_t>::const_iterator p = removed_overlays.begin();
p != removed_overlays.end(); ++p) {
f->dump_unsigned("key", *p);
for (auto key : removed_overlays) {
f->dump_unsigned("key", key);
}
f->close_section();
}
@ -805,19 +795,9 @@ void bluestore_wal_op_t::generate_test_instances(list<bluestore_wal_op_t*>& o)
o.push_back(new bluestore_wal_op_t);
o.push_back(new bluestore_wal_op_t);
o.back()->op = OP_WRITE;
o.back()->extent.offset = 1;
o.back()->extent.length = 2;
o.back()->src_extent.offset = 10000;
o.back()->src_extent.length = 2;
o.back()->src_rmw_head = 22;
o.back()->src_rmw_tail = 88;
o.back()->extents.push_back(bluestore_pextent_t(1, 2));
o.back()->extents.push_back(bluestore_pextent_t(100, 5));
o.back()->data.append("my data");
o.back()->nid = 3;
o.back()->overlays.push_back(bluestore_overlay_t());
o.back()->overlays.push_back(bluestore_overlay_t());
o.back()->overlays.back().key = 4;
o.back()->overlays.back().value_offset = 5;
o.back()->overlays.back().length = 6;
}
void bluestore_wal_transaction_t::encode(bufferlist& bl) const
@ -865,8 +845,6 @@ void bluestore_wal_transaction_t::generate_test_instances(list<bluestore_wal_tra
o.back()->ops.push_back(bluestore_wal_op_t());
o.back()->ops.push_back(bluestore_wal_op_t());
o.back()->ops.back().op = bluestore_wal_op_t::OP_WRITE;
o.back()->ops.back().extent.offset = 2;
o.back()->ops.back().extent.length = 3;
o.back()->ops.back().extents.push_back(bluestore_pextent_t(1,7));
o.back()->ops.back().data.append("foodata");
o.back()->ops.back().nid = 4;
}

View File

@ -415,11 +415,10 @@ struct bluestore_blob_t {
return &csum_data[cs * i];
}
void init_csum(unsigned type, unsigned order) {
void init_csum(unsigned type, unsigned order, unsigned len) {
csum_type = type;
csum_block_order = order;
csum_data.resize(get_csum_value_size() * get_ondisk_length() /
get_csum_block_size());
csum_data.resize(get_csum_value_size() * len / get_csum_block_size());
}
};
WRITE_CLASS_ENCODER(bluestore_blob_t)
@ -606,20 +605,16 @@ WRITE_CLASS_ENCODER(bluestore_onode_t)
struct bluestore_wal_op_t {
typedef enum {
OP_WRITE = 1,
OP_COPY = 2,
OP_ZERO = 4,
} type_t;
__u8 op = 0;
bluestore_pextent_t extent;
bluestore_pextent_t src_extent;
uint64_t src_rmw_head, src_rmw_tail;
vector<bluestore_pextent_t> extents;
bufferlist data;
uint64_t nid;
vector<bluestore_overlay_t> overlays;
vector<uint64_t> removed_overlays;
bluestore_wal_op_t() : src_rmw_head(0), src_rmw_tail(0), nid(0) {}
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& p);
void dump(Formatter *f) const;
@ -638,21 +633,6 @@ struct bluestore_wal_transaction_t {
bluestore_wal_transaction_t() : seq(0), _bytes(-1) {}
#if 0
no users for htis
uint64_t get_bytes() {
if (_bytes < 0) {
_bytes = 0;
for (list<bluestore_wal_op_t>::iterator p = ops.begin();
p != ops.end();
++p) {
_bytes += p->extent.length;
}
}
return _bytes;
}
#endif
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& p);
void dump(Formatter *f) const;