diff --git a/src/os/Transaction.cc b/src/os/Transaction.cc index 5a5ae0cb6a2..d3a04579dda 100644 --- a/src/os/Transaction.cc +++ b/src/os/Transaction.cc @@ -56,6 +56,16 @@ void Transaction::dump(ceph::Formatter *f) case Transaction::OP_NOP: f->dump_string("op_name", "nop"); break; + case Transaction::OP_CREATE: + { + coll_t cid = i.get_cid(op->cid); + ghobject_t oid = i.get_oid(op->oid); + f->dump_string("op_name", "create"); + f->dump_stream("collection") << cid; + f->dump_stream("oid") << oid; + } + break; + case Transaction::OP_TOUCH: { coll_t cid = i.get_cid(op->cid); diff --git a/src/os/Transaction.h b/src/os/Transaction.h index 5c8111779a7..20d554819c2 100644 --- a/src/os/Transaction.h +++ b/src/os/Transaction.h @@ -106,6 +106,7 @@ class Transaction { public: enum { OP_NOP = 0, + OP_CREATE = 7, // cid, oid OP_TOUCH = 9, // cid, oid OP_WRITE = 10, // cid, oid, offset, len, bl OP_ZERO = 11, // cid, oid, offset, len @@ -400,6 +401,7 @@ public: case OP_NOP: break; + case OP_CREATE: case OP_TOUCH: case OP_REMOVE: case OP_SETATTR: @@ -791,6 +793,19 @@ public: _op->op = OP_NOP; data.ops++; } + /** + * create + * + * create an object that does not yet exist + * (behavior is undefined if the object already exists) + */ + void create(const coll_t& cid, const ghobject_t& oid) { + Op* _op = _get_next_op(); + _op->op = OP_CREATE; + _op->cid = _get_coll_id(cid); + _op->oid = _get_object_id(oid); + data.ops++; + } /** * touch * diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 5db054135e1..f96caa7e6b0 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -3504,7 +3504,8 @@ uint64_t BlueStore::Collection::make_blob_unshared(SharedBlob *sb) BlueStore::OnodeRef BlueStore::Collection::get_onode( const ghobject_t& oid, - bool create) + bool create, + bool is_createop) { ceph_assert(create ? lock.is_wlocked() : lock.is_locked()); @@ -3528,9 +3529,12 @@ BlueStore::OnodeRef BlueStore::Collection::get_onode( << pretty_binary_string(key) << dendl; bufferlist v; - int r = store->db->get(PREFIX_OBJ, key.c_str(), key.size(), &v); - ldout(store->cct, 20) << " r " << r << " v.len " << v.length() << dendl; + int r = -ENOENT; Onode *on; + if (!is_createop) { + r = store->db->get(PREFIX_OBJ, key.c_str(), key.size(), &v); + ldout(store->cct, 20) << " r " << r << " v.len " << v.length() << dendl; + } if (v.length() == 0) { ceph_assert(r == -ENOENT); if (!store->cct->_conf->bluestore_debug_misc && @@ -11186,6 +11190,7 @@ void BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t) // these operations implicity create the object bool create = false; if (op->op == Transaction::OP_TOUCH || + op->op == Transaction::OP_CREATE || op->op == Transaction::OP_WRITE || op->op == Transaction::OP_ZERO) { create = true; @@ -11196,7 +11201,7 @@ void BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t) OnodeRef &o = ovec[op->oid]; if (!o) { ghobject_t oid = i.get_oid(op->oid); - o = c->get_onode(oid, create); + o = c->get_onode(oid, create, op->op == Transaction::OP_CREATE); } if (!create && (!o || !o->exists)) { dout(10) << __func__ << " op " << op->op << " got ENOENT on " @@ -11206,6 +11211,7 @@ void BlueStore::_txc_add_transaction(TransContext *txc, Transaction *t) } switch (op->op) { + case Transaction::OP_CREATE: case Transaction::OP_TOUCH: r = _touch(txc, c, o); break; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 227716dfe3c..4f33010da27 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1396,7 +1396,7 @@ public: pool_opts_t pool_opts; ContextQueue *commit_queue; - OnodeRef get_onode(const ghobject_t& oid, bool create); + OnodeRef get_onode(const ghobject_t& oid, bool create, bool is_createop=false); // the terminology is confusing here, sorry! // diff --git a/src/os/filestore/FileStore.cc b/src/os/filestore/FileStore.cc index e72f0cc3095..ac7c6157075 100644 --- a/src/os/filestore/FileStore.cc +++ b/src/os/filestore/FileStore.cc @@ -2788,6 +2788,7 @@ void FileStore::_do_transaction( case Transaction::OP_NOP: break; case Transaction::OP_TOUCH: + case Transaction::OP_CREATE: { const coll_t &_cid = i.get_cid(op->cid); const ghobject_t &oid = i.get_oid(op->oid); diff --git a/src/os/kstore/KStore.cc b/src/os/kstore/KStore.cc index 2cf82e5efd0..eab11add1a2 100644 --- a/src/os/kstore/KStore.cc +++ b/src/os/kstore/KStore.cc @@ -2315,6 +2315,7 @@ void KStore::_txc_add_transaction(TransContext *txc, Transaction *t) // these operations implicity create the object bool create = false; if (op->op == Transaction::OP_TOUCH || + op->op == Transaction::OP_CREATE || op->op == Transaction::OP_WRITE || op->op == Transaction::OP_ZERO) { create = true; @@ -2333,6 +2334,7 @@ void KStore::_txc_add_transaction(TransContext *txc, Transaction *t) switch (op->op) { case Transaction::OP_TOUCH: + case Transaction::OP_CREATE: r = _touch(txc, c, o); break; diff --git a/src/os/memstore/MemStore.cc b/src/os/memstore/MemStore.cc index dc1d5ff55ab..02bbf62395f 100644 --- a/src/os/memstore/MemStore.cc +++ b/src/os/memstore/MemStore.cc @@ -670,6 +670,7 @@ void MemStore::_do_transaction(Transaction& t) case Transaction::OP_NOP: break; case Transaction::OP_TOUCH: + case Transaction::OP_CREATE: { coll_t cid = i.get_cid(op->cid); ghobject_t oid = i.get_oid(op->oid); diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index a14de9558a5..cad3fc40cc2 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1980,7 +1980,8 @@ bool ECBackend::try_reads_to_commit() &trans, &(op->temp_added), &(op->temp_cleared), - get_parent()->get_dpp()); + get_parent()->get_dpp(), + get_osdmap()->require_osd_release); } dout(20) << __func__ << ": " << cache << dendl; diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc index 38402004c5d..0489fd7b5a7 100644 --- a/src/osd/ECTransaction.cc +++ b/src/osd/ECTransaction.cc @@ -105,7 +105,8 @@ void ECTransaction::generate_transactions( map *transactions, set *temp_added, set *temp_removed, - DoutPrefixProvider *dpp) + DoutPrefixProvider *dpp, + const ceph_release_t require_osd_release) { ceph_assert(written_map); ceph_assert(transactions); @@ -264,9 +265,15 @@ void ECTransaction::generate_transactions( [&](const PGTransaction::ObjectOperation::Init::None &) {}, [&](const PGTransaction::ObjectOperation::Init::Create &op) { for (auto &&st: *transactions) { - st.second.touch( - coll_t(spg_t(pgid, st.first)), - ghobject_t(oid, ghobject_t::NO_GEN, st.first)); + if (require_osd_release >= ceph_release_t::nautilus) { + st.second.create( + coll_t(spg_t(pgid, st.first)), + ghobject_t(oid, ghobject_t::NO_GEN, st.first)); + } else { + st.second.touch( + coll_t(spg_t(pgid, st.first)), + ghobject_t(oid, ghobject_t::NO_GEN, st.first)); + } } }, [&](const PGTransaction::ObjectOperation::Init::Clone &op) { diff --git a/src/osd/ECTransaction.h b/src/osd/ECTransaction.h index ae0faf5df4d..704be1f0826 100644 --- a/src/osd/ECTransaction.h +++ b/src/osd/ECTransaction.h @@ -193,7 +193,8 @@ namespace ECTransaction { map *transactions, set *temp_added, set *temp_removed, - DoutPrefixProvider *dpp); + DoutPrefixProvider *dpp, + const ceph_release_t require_osd_release = ceph_release_t::unknown); }; #endif diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index 81e2651d988..31e0af78d4e 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -282,7 +282,8 @@ void generate_transaction( vector &log_entries, ObjectStore::Transaction *t, set *added, - set *removed) + set *removed, + const ceph_release_t require_osd_release = ceph_release_t::unknown ) { ceph_assert(t); ceph_assert(added); @@ -323,7 +324,11 @@ void generate_transaction( [&](const PGTransaction::ObjectOperation::Init::None &) { }, [&](const PGTransaction::ObjectOperation::Init::Create &op) { - t->touch(coll, goid); + if (require_osd_release >= ceph_release_t::nautilus) { + t->create(coll, goid); + } else { + t->touch(coll, goid); + } }, [&](const PGTransaction::ObjectOperation::Init::Clone &op) { t->clone( @@ -449,7 +454,8 @@ void ReplicatedBackend::submit_transaction( log_entries, &op_t, &added, - &removed); + &removed, + get_osdmap()->require_osd_release); ceph_assert(added.size() <= 1); ceph_assert(removed.size() <= 1);