mirror of
https://github.com/ceph/ceph
synced 2025-01-03 09:32:43 +00:00
osd: combine all internal object state into single '_' object_info_t xattr
This commit is contained in:
parent
c9554fc5d8
commit
478e51aa02
@ -508,4 +508,14 @@ inline void decode_nohead(int len, bufferlist& s, bufferlist::iterator& p)
|
||||
p.copy(len, s);
|
||||
}
|
||||
|
||||
|
||||
// full bl decoder
|
||||
template<class T>
|
||||
inline void decode(T o, bufferlist& bl)
|
||||
{
|
||||
bufferlist::iterator p = bl.begin();
|
||||
decode(o, p);
|
||||
assert(p.end());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -2492,7 +2492,7 @@ void OSD::split_pg(PG *parent, map<pg_t,PG*>& children, ObjectStore::Transaction
|
||||
PG *child = children[pgid];
|
||||
assert(child);
|
||||
bufferlist bv;
|
||||
store->getattr(parentid.to_coll(), poid, "oi", bv);
|
||||
store->getattr(parentid.to_coll(), poid, OI_ATTR, bv);
|
||||
object_info_t oi(bv);
|
||||
|
||||
if (oi.version > child->info.last_update) {
|
||||
|
@ -541,14 +541,14 @@ bool PG::build_backlog_map(map<eversion_t,Log::Entry>& omap)
|
||||
Log::Entry e;
|
||||
e.oid = it->oid;
|
||||
bufferlist bv;
|
||||
osd->store->getattr(info.pgid.to_coll(), poid, "oi", bv);
|
||||
osd->store->getattr(info.pgid.to_coll(), poid, OI_ATTR, bv);
|
||||
object_info_t oi(bv);
|
||||
e.version = oi.version;
|
||||
e.prior_version = oi.prior_version;
|
||||
e.reqid = oi.last_reqid;
|
||||
if (poid.oid.snap && poid.oid.snap < CEPH_NOSNAP) {
|
||||
e.op = Log::Entry::CLONE;
|
||||
osd->store->getattr(info.pgid.to_coll(), poid, "snaps", e.snaps);
|
||||
::encode(oi.snaps, e.snaps);
|
||||
} else {
|
||||
e.op = Log::Entry::BACKLOG; // FIXME if/when we do smarter op codes!
|
||||
}
|
||||
@ -1786,13 +1786,16 @@ void PG::read_log(ObjectStore *store)
|
||||
|
||||
pobject_t poid(info.pgid.pool(), 0, i->oid);
|
||||
bufferlist bv;
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), poid, "oi", bv);
|
||||
object_info_t oi;
|
||||
if (r >= 0)
|
||||
oi.decode(bv);
|
||||
if (r < 0 || oi.version < i->version) {
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), poid, OI_ATTR, bv);
|
||||
if (r >= 0) {
|
||||
object_info_t oi(bv);
|
||||
if (oi.version < i->version) {
|
||||
dout(15) << "read_log missing " << *i << " (have " << oi.version << ")" << dendl;
|
||||
missing.add(i->oid, i->version, oi.version);
|
||||
}
|
||||
} else {
|
||||
dout(15) << "read_log missing " << *i << dendl;
|
||||
missing.add(i->oid, i->version, oi.version);
|
||||
missing.add(i->oid, i->version, eversion_t());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -447,21 +447,21 @@ bool ReplicatedPG::snap_trimmer()
|
||||
|
||||
ObjectStore::Transaction t;
|
||||
|
||||
// load clone snap list
|
||||
// load clone info
|
||||
bufferlist bl;
|
||||
osd->store->getattr(info.pgid.to_coll(), coid, "snaps", bl);
|
||||
bufferlist::iterator blp = bl.begin();
|
||||
vector<snapid_t> snaps;
|
||||
::decode(snaps, blp);
|
||||
osd->store->getattr(info.pgid.to_coll(), coid, OI_ATTR, bl);
|
||||
object_info_t coi(bl);
|
||||
|
||||
// load head snapset
|
||||
// load head info
|
||||
pobject_t head = coid;
|
||||
head.oid.snap = CEPH_NOSNAP;
|
||||
bl.clear();
|
||||
osd->store->getattr(info.pgid.to_coll(), head, "snapset", bl);
|
||||
blp = bl.begin();
|
||||
SnapSet snapset;
|
||||
::decode(snapset, blp);
|
||||
osd->store->getattr(info.pgid.to_coll(), head, OI_ATTR, bl);
|
||||
object_info_t hoi(bl);
|
||||
|
||||
vector<snapid_t>& snaps = coi.snaps;
|
||||
SnapSet& snapset = hoi.snapset;
|
||||
|
||||
dout(10) << coid << " old head " << head << " snapset " << snapset << dendl;
|
||||
|
||||
// remove snaps
|
||||
@ -508,9 +508,10 @@ bool ReplicatedPG::snap_trimmer()
|
||||
} else {
|
||||
// save adjusted snaps for this object
|
||||
dout(10) << coid << " snaps " << snaps << " -> " << newsnaps << dendl;
|
||||
coi.snaps.swap(newsnaps);
|
||||
bl.clear();
|
||||
::encode(newsnaps, bl);
|
||||
t.setattr(info.pgid.to_coll(), coid, "snaps", bl);
|
||||
::encode(coi, bl);
|
||||
t.setattr(info.pgid.to_coll(), coid, OI_ATTR, bl);
|
||||
|
||||
if (snaps[0] != newsnaps[0]) {
|
||||
t.collection_remove(info.pgid.to_snap_coll(snaps[0]), coid);
|
||||
@ -532,8 +533,8 @@ bool ReplicatedPG::snap_trimmer()
|
||||
info.stats.num_objects--;
|
||||
} else {
|
||||
bl.clear();
|
||||
::encode(snapset, bl);
|
||||
t.setattr(info.pgid.to_coll(), head, "snapset", bl);
|
||||
::encode(hoi, bl);
|
||||
t.setattr(info.pgid.to_coll(), head, OI_ATTR, bl);
|
||||
}
|
||||
|
||||
osd->store->apply_transaction(t);
|
||||
@ -577,30 +578,26 @@ int ReplicatedPG::pick_read_snap(pobject_t& poid)
|
||||
pobject_t head = poid;
|
||||
head.oid.snap = CEPH_NOSNAP;
|
||||
|
||||
SnapSet snapset;
|
||||
{
|
||||
bufferlist bl;
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), head, "snapset", bl);
|
||||
if (r < 0)
|
||||
return -ENOENT; // if head doesn't exist, no snapped version will either.
|
||||
bufferlist::iterator p = bl.begin();
|
||||
::decode(snapset, p);
|
||||
}
|
||||
bufferlist bl;
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), head, OI_ATTR, bl);
|
||||
if (r < 0)
|
||||
return -ENOENT; // if head doesn't exist, no snapped version will either.
|
||||
object_info_t hoi(bl);
|
||||
|
||||
dout(10) << "pick_read_snap " << poid << " snapset " << snapset << dendl;
|
||||
dout(10) << "pick_read_snap " << poid << " snapset " << hoi.snapset << dendl;
|
||||
snapid_t want = poid.oid.snap;
|
||||
|
||||
// head?
|
||||
if (want > snapset.seq) {
|
||||
if (snapset.head_exists) {
|
||||
if (want > hoi.snapset.seq) {
|
||||
if (hoi.snapset.head_exists) {
|
||||
dout(10) << "pick_read_snap " << head
|
||||
<< " want " << want << " > snapset seq " << snapset.seq
|
||||
<< " want " << want << " > snapset seq " << hoi.snapset.seq
|
||||
<< " -- HIT" << dendl;
|
||||
poid = head;
|
||||
return 0;
|
||||
} else {
|
||||
dout(10) << "pick_read_snap " << head
|
||||
<< " want " << want << " > snapset seq " << snapset.seq
|
||||
<< " want " << want << " > snapset seq " << hoi.snapset.seq
|
||||
<< " but head_exists = false -- DNE" << dendl;
|
||||
return -ENOENT;
|
||||
}
|
||||
@ -608,36 +605,34 @@ int ReplicatedPG::pick_read_snap(pobject_t& poid)
|
||||
|
||||
// which clone would it be?
|
||||
unsigned k = 0;
|
||||
while (k<snapset.clones.size() && snapset.clones[k] < want)
|
||||
while (k<hoi.snapset.clones.size() && hoi.snapset.clones[k] < want)
|
||||
k++;
|
||||
if (k == snapset.clones.size()) {
|
||||
if (k == hoi.snapset.clones.size()) {
|
||||
dout(10) << "pick_read_snap no clones with last >= want " << want << " -- DNE" << dendl;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
// check clone
|
||||
poid.oid.snap = snapset.clones[k];
|
||||
poid.oid.snap = hoi.snapset.clones[k];
|
||||
|
||||
if (missing.is_missing(poid.oid)) {
|
||||
dout(20) << "pick_read_snap " << poid << " missing, try again later" << dendl;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
vector<snapid_t> snaps;
|
||||
{
|
||||
bufferlist bl;
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), poid, "snaps", bl);
|
||||
if (r < 0) {
|
||||
dout(20) << "pick_read_snap " << poid << " dne, wtf" << dendl;
|
||||
assert(0);
|
||||
return -ENOENT;
|
||||
}
|
||||
bufferlist::iterator p = bl.begin();
|
||||
::decode(snaps, p);
|
||||
// clone
|
||||
bl.clear();
|
||||
r = osd->store->getattr(info.pgid.to_coll(), poid, OI_ATTR, bl);
|
||||
if (r < 0) {
|
||||
dout(20) << "pick_read_snap " << poid << " dne, wtf" << dendl;
|
||||
assert(0);
|
||||
return -ENOENT;
|
||||
}
|
||||
dout(20) << "pick_read_snap " << poid << " snaps " << snaps << dendl;
|
||||
snapid_t first = snaps[snaps.size()-1];
|
||||
snapid_t last = snaps[0];
|
||||
object_info_t coi(bl);
|
||||
|
||||
dout(20) << "pick_read_snap " << poid << " snaps " << coi.snaps << dendl;
|
||||
snapid_t first = coi.snaps[coi.snaps.size()-1];
|
||||
snapid_t last = coi.snaps[0];
|
||||
if (first <= want) {
|
||||
dout(20) << "pick_read_snap " << poid << " [" << first << "," << last << "] contains " << want << " -- HIT" << dendl;
|
||||
return 0;
|
||||
@ -727,8 +722,7 @@ void ReplicatedPG::op_read(MOSDOp *op)
|
||||
__u32 cur = 0;
|
||||
osd->store->getattr(info.pgid.to_coll(), poid, "inc_lock", b);
|
||||
if (b.length()) {
|
||||
bufferlist::iterator bp = b.begin();
|
||||
::decode(cur, bp);
|
||||
::decode(cur, b);
|
||||
if (cur > op->get_inc_lock()) {
|
||||
dout(10) << " inc_lock " << cur << " > " << op->get_inc_lock()
|
||||
<< " on " << poid << dendl;
|
||||
@ -825,56 +819,52 @@ void ReplicatedPG::op_read(MOSDOp *op)
|
||||
|
||||
void ReplicatedPG::_make_clone(ObjectStore::Transaction& t,
|
||||
pobject_t head, pobject_t coid,
|
||||
eversion_t ov, eversion_t v, osd_reqid_t& reqid, bufferlist& snapsbl)
|
||||
eversion_t ov, eversion_t v, osd_reqid_t& reqid, vector<snapid_t>& snaps)
|
||||
{
|
||||
t.clone(info.pgid.to_coll(), head, coid);
|
||||
t.setattr(info.pgid.to_coll(), coid, "snaps", snapsbl);
|
||||
|
||||
object_info_t pi;
|
||||
object_info_t pi(coid);
|
||||
pi.version = v;
|
||||
pi.prior_version = ov;
|
||||
pi.last_reqid = reqid;
|
||||
pi.mtime = g_clock.now();
|
||||
bufferlist bv(sizeof(pi));
|
||||
pi.snaps.swap(snaps);
|
||||
bufferlist bv;
|
||||
::encode(pi, bv);
|
||||
t.setattr(info.pgid.to_coll(), coid, "oi", bv);
|
||||
|
||||
t.clone(info.pgid.to_coll(), head, coid);
|
||||
t.setattr(info.pgid.to_coll(), coid, OI_ATTR, bv);
|
||||
}
|
||||
|
||||
void ReplicatedPG::prepare_clone(ObjectStore::Transaction& t, bufferlist& logbl, osd_reqid_t reqid, pg_stat_t& stats,
|
||||
pobject_t poid, loff_t old_size,
|
||||
eversion_t old_version, eversion_t& at_version,
|
||||
SnapSet& snapset, SnapContext& snapc)
|
||||
pobject_t poid, loff_t old_size, object_info_t& oi,
|
||||
eversion_t& at_version, SnapContext& snapc)
|
||||
{
|
||||
// clone?
|
||||
assert(poid.oid.snap == CEPH_NOSNAP);
|
||||
dout(20) << "snapset=" << snapset << " snapc=" << snapc << dendl;;
|
||||
dout(20) << "snapset=" << oi.snapset << " snapc=" << snapc << dendl;;
|
||||
|
||||
// use newer snapc?
|
||||
if (snapset.seq > snapc.seq) {
|
||||
snapc.seq = snapset.seq;
|
||||
snapc.snaps = snapset.snaps;
|
||||
if (oi.snapset.seq > snapc.seq) {
|
||||
snapc.seq = oi.snapset.seq;
|
||||
snapc.snaps = oi.snapset.snaps;
|
||||
dout(10) << " using newer snapc " << snapc << dendl;
|
||||
}
|
||||
|
||||
if (snapset.head_exists && // head exists
|
||||
if (oi.snapset.head_exists && // head exists
|
||||
snapc.snaps.size() && // there are snaps
|
||||
snapc.snaps[0] > snapset.seq) { // existing object is old
|
||||
snapc.snaps[0] > oi.snapset.seq) { // existing object is old
|
||||
// clone
|
||||
pobject_t coid = poid;
|
||||
coid.oid.snap = snapc.seq;
|
||||
|
||||
unsigned l;
|
||||
for (l=1; l<snapc.snaps.size() && snapc.snaps[l] > snapset.seq; l++) ;
|
||||
for (l=1; l<snapc.snaps.size() && snapc.snaps[l] > oi.snapset.seq; l++) ;
|
||||
|
||||
vector<snapid_t> snaps(l);
|
||||
for (unsigned i=0; i<l; i++)
|
||||
snaps[i] = snapc.snaps[i];
|
||||
|
||||
bufferlist snapsbl;
|
||||
::encode(snaps, snapsbl);
|
||||
|
||||
// prepare clone
|
||||
_make_clone(t, poid, coid, old_version, at_version, reqid, snapsbl);
|
||||
_make_clone(t, poid, coid, oi.version, at_version, reqid, snaps);
|
||||
|
||||
// add to snap bound collections
|
||||
coll_t fc = make_snap_collection(t, snaps[0]);
|
||||
@ -886,24 +876,24 @@ void ReplicatedPG::prepare_clone(ObjectStore::Transaction& t, bufferlist& logbl,
|
||||
|
||||
stats.num_objects++;
|
||||
stats.num_object_clones++;
|
||||
snapset.clones.push_back(coid.oid.snap);
|
||||
snapset.clone_size[coid.oid.snap] = old_size;
|
||||
snapset.clone_overlap[coid.oid.snap].insert(0, old_size);
|
||||
oi.snapset.clones.push_back(coid.oid.snap);
|
||||
oi.snapset.clone_size[coid.oid.snap] = old_size;
|
||||
oi.snapset.clone_overlap[coid.oid.snap].insert(0, old_size);
|
||||
|
||||
// log clone
|
||||
dout(10) << "cloning v " << old_version
|
||||
dout(10) << "cloning v " << oi.version
|
||||
<< " to " << coid << " v " << at_version
|
||||
<< " snaps=" << snaps << dendl;
|
||||
Log::Entry cloneentry(PG::Log::Entry::CLONE, coid.oid, at_version, old_version, reqid);
|
||||
cloneentry.snaps = snapsbl;
|
||||
Log::Entry cloneentry(PG::Log::Entry::CLONE, coid.oid, at_version, oi.version, reqid);
|
||||
::encode(snaps, cloneentry.snaps);
|
||||
add_log_entry(cloneentry, logbl);
|
||||
|
||||
at_version.version++;
|
||||
}
|
||||
|
||||
// update snapset with latest snap context
|
||||
snapset.seq = snapc.seq;
|
||||
snapset.snaps = snapc.snaps;
|
||||
oi.snapset.seq = snapc.seq;
|
||||
oi.snapset.snaps = snapc.snaps;
|
||||
}
|
||||
|
||||
|
||||
@ -1153,10 +1143,8 @@ int ReplicatedPG::prepare_simple_op(ObjectStore::Transaction& t, osd_reqid_t req
|
||||
void ReplicatedPG::prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t reqid,
|
||||
pobject_t poid,
|
||||
vector<ceph_osd_op>& ops, bufferlist& bl,
|
||||
bool& exists, __u64& size,
|
||||
object_info_t& oi,
|
||||
eversion_t at_version,
|
||||
SnapSet& snapset, SnapContext& snapc,
|
||||
bool& exists, __u64& size, object_info_t& oi,
|
||||
eversion_t at_version, SnapContext& snapc,
|
||||
__u32 inc_lock, eversion_t trim_to)
|
||||
{
|
||||
bufferlist log_bl;
|
||||
@ -1172,13 +1160,13 @@ void ReplicatedPG::prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t
|
||||
// clone?
|
||||
if (!did_snap && poid.oid.snap &&
|
||||
!ceph_osd_op_type_lock(ops[i].op)) { // is a (non-lock) modification
|
||||
prepare_clone(t, log_bl, reqid, info.stats, poid, size, old_version, at_version,
|
||||
snapset, snapc);
|
||||
prepare_clone(t, log_bl, reqid, info.stats, poid, size, oi,
|
||||
at_version, snapc);
|
||||
did_snap = true;
|
||||
}
|
||||
prepare_simple_op(t, reqid, info.stats, poid, size, exists,
|
||||
ops[i], bp,
|
||||
snapset, snapc);
|
||||
oi.snapset, snapc);
|
||||
}
|
||||
|
||||
// finish.
|
||||
@ -1194,13 +1182,10 @@ void ReplicatedPG::prepare_transaction(ObjectStore::Transaction& t, osd_reqid_t
|
||||
oi.prior_version = old_version;
|
||||
oi.last_reqid = reqid;
|
||||
oi.mtime = g_clock.now();
|
||||
|
||||
bufferlist bv(sizeof(oi));
|
||||
::encode(oi, bv);
|
||||
t.setattr(info.pgid.to_coll(), poid, "oi", bv);
|
||||
|
||||
bufferlist snapsetbl;
|
||||
::encode(snapset, snapsetbl);
|
||||
t.setattr(info.pgid.to_coll(), poid, "snapset", snapsetbl);
|
||||
t.setattr(info.pgid.to_coll(), poid, OI_ATTR, bv);
|
||||
}
|
||||
|
||||
// append to log
|
||||
@ -1408,7 +1393,7 @@ void ReplicatedPG::issue_repop(RepGather *repop, int dest, utime_t now)
|
||||
wr->old_exists = repop->pinfo->exists;
|
||||
wr->old_size = repop->pinfo->size;
|
||||
wr->old_version = repop->pinfo->oi.version;
|
||||
wr->snapset = repop->pinfo->snapset;
|
||||
wr->snapset = repop->pinfo->oi.snapset;
|
||||
wr->snapc = repop->snapc;
|
||||
wr->get_data() = repop->op->get_data(); // _copy_ bufferlist
|
||||
if (is_complete_pg())
|
||||
@ -1498,11 +1483,9 @@ ReplicatedPG::ProjectedObjectInfo *ReplicatedPG::get_projected_object(pobject_t
|
||||
return pinfo; // already had it
|
||||
}
|
||||
|
||||
dout(10) << "get_projected_object " << poid << " reading from disk" << dendl;
|
||||
|
||||
// pull info off disk
|
||||
pinfo->poid = poid;
|
||||
|
||||
|
||||
struct stat st;
|
||||
int r = osd->store->stat(info.pgid.to_coll(), poid, &st);
|
||||
if (r == 0) {
|
||||
@ -1510,24 +1493,16 @@ ReplicatedPG::ProjectedObjectInfo *ReplicatedPG::get_projected_object(pobject_t
|
||||
pinfo->size = st.st_size;
|
||||
|
||||
bufferlist bv;
|
||||
r = osd->store->getattr(info.pgid.to_coll(), poid, "oi", bv);
|
||||
r = osd->store->getattr(info.pgid.to_coll(), poid, OI_ATTR, bv);
|
||||
assert(r >= 0);
|
||||
pinfo->oi.decode(bv);
|
||||
|
||||
if (poid.oid.snap == CEPH_NOSNAP) {
|
||||
bufferlist bl;
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), poid, "snapset", bl);
|
||||
if (r >= 0) {
|
||||
bufferlist::iterator p = bl.begin();
|
||||
::decode(pinfo->snapset, p);
|
||||
}
|
||||
} else
|
||||
assert(poid.oid.snap == 0); // no snapshotting.
|
||||
} else {
|
||||
pinfo->oi.poid = poid;
|
||||
pinfo->exists = false;
|
||||
pinfo->size = 0;
|
||||
}
|
||||
|
||||
dout(10) << "get_projected_object " << poid << " read " << pinfo->oi << dendl;
|
||||
return pinfo;
|
||||
}
|
||||
|
||||
@ -1565,9 +1540,8 @@ void ReplicatedPG::op_modify(MOSDOp *op)
|
||||
bufferlist b;
|
||||
osd->store->getattr(info.pgid.to_coll(), poid, "inc_lock", b);
|
||||
if (b.length()) {
|
||||
bufferlist::iterator bp = b.begin();
|
||||
__u32 cur = 0;
|
||||
::decode(cur, bp);
|
||||
::decode(cur, b);
|
||||
if (cur > op->get_inc_lock()) {
|
||||
dout(10) << " inc_lock " << cur << " > " << op->get_inc_lock()
|
||||
<< " on " << poid << dendl;
|
||||
@ -1646,13 +1620,13 @@ void ReplicatedPG::op_modify(MOSDOp *op)
|
||||
<< " " << poid.oid
|
||||
<< " ov " << pinfo->oi.version << " av " << at_version
|
||||
<< " snapc " << snapc
|
||||
<< " snapset " << pinfo->snapset
|
||||
<< " snapset " << pinfo->oi.snapset
|
||||
<< dendl;
|
||||
|
||||
// verify snap ordering
|
||||
if ((op->get_flags() & CEPH_OSD_OP_ORDERSNAP) &&
|
||||
snapc.seq < pinfo->snapset.seq) {
|
||||
dout(10) << " ORDERSNAP flag set and snapc seq " << snapc.seq << " < snapset seq " << pinfo->snapset.seq
|
||||
snapc.seq < pinfo->oi.snapset.seq) {
|
||||
dout(10) << " ORDERSNAP flag set and snapc seq " << snapc.seq << " < snapset seq " << pinfo->oi.snapset.seq
|
||||
<< " on " << poid << dendl;
|
||||
osd->reply_op_error(op, -EOLDSNAPC);
|
||||
return;
|
||||
@ -1694,8 +1668,8 @@ void ReplicatedPG::op_modify(MOSDOp *op)
|
||||
if (!noop) {
|
||||
// log and update later.
|
||||
prepare_transaction(repop->t, op->get_reqid(), poid, op->ops, op->get_data(),
|
||||
pinfo->exists, pinfo->size, pinfo->oi, at_version,
|
||||
pinfo->snapset, snapc,
|
||||
pinfo->exists, pinfo->size, pinfo->oi,
|
||||
at_version, snapc,
|
||||
op->get_inc_lock(), trim_to);
|
||||
}
|
||||
|
||||
@ -1807,13 +1781,13 @@ void ReplicatedPG::sub_op_modify(MOSDSubOp *op)
|
||||
osd->logger->inc("r_wrb", op->get_data().length());
|
||||
|
||||
if (!op->noop) {
|
||||
object_info_t oi;
|
||||
object_info_t oi(op->poid);
|
||||
oi.version = op->old_version;
|
||||
oi.snapset = op->snapset;
|
||||
prepare_transaction(t, op->reqid,
|
||||
op->poid, op->ops, op->get_data(),
|
||||
op->old_exists, op->old_size, oi, op->version,
|
||||
op->snapset, op->snapc,
|
||||
op->inc_lock, op->pg_trim_to);
|
||||
op->snapc, op->inc_lock, op->pg_trim_to);
|
||||
}
|
||||
|
||||
C_OSD_RepModifyCommit *oncommit = new C_OSD_RepModifyCommit(this, op, ackerosd, info.last_complete);
|
||||
@ -2033,14 +2007,12 @@ bool ReplicatedPG::pull(pobject_t poid)
|
||||
|
||||
// check snapset
|
||||
bufferlist bl;
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), head, "snapset", bl);
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), head, OI_ATTR, bl);
|
||||
assert(r >= 0);
|
||||
SnapSet snapset;
|
||||
bufferlist::iterator blp = bl.begin();
|
||||
::decode(snapset, blp);
|
||||
dout(10) << " snapset " << snapset << dendl;
|
||||
object_info_t oi(bl);
|
||||
dout(10) << " snapset " << oi.snapset << dendl;
|
||||
|
||||
calc_clone_subsets(snapset, poid, missing,
|
||||
calc_clone_subsets(oi.snapset, poid, missing,
|
||||
data_subset, clone_subsets);
|
||||
// FIXME: this may overestimate if we are pulling multiple clones in parallel...
|
||||
dout(10) << " pulling " << data_subset << ", will clone " << clone_subsets
|
||||
@ -2087,13 +2059,13 @@ void ReplicatedPG::push_to_replica(pobject_t poid, int peer)
|
||||
map<pobject_t, interval_set<__u64> > clone_subsets;
|
||||
interval_set<__u64> data_subset;
|
||||
|
||||
bufferlist bv;
|
||||
r = osd->store->getattr(info.pgid.to_coll(), poid, OI_ATTR, bv);
|
||||
assert(r >= 0);
|
||||
object_info_t oi(bv);
|
||||
|
||||
// are we doing a clone on the replica?
|
||||
if (poid.oid.snap && poid.oid.snap < CEPH_NOSNAP) {
|
||||
bufferlist bv;
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), poid, "oi", bv);
|
||||
assert(r >= 0);
|
||||
object_info_t oi(bv);
|
||||
|
||||
pobject_t head = poid;
|
||||
head.oid.snap = CEPH_NOSNAP;
|
||||
if (peer_missing[peer].is_missing(head.oid) &&
|
||||
@ -2116,27 +2088,18 @@ void ReplicatedPG::push_to_replica(pobject_t poid, int peer)
|
||||
}
|
||||
|
||||
bufferlist bl;
|
||||
r = osd->store->getattr(info.pgid.to_coll(), head, "snapset", bl);
|
||||
r = osd->store->getattr(info.pgid.to_coll(), head, OI_ATTR, bl);
|
||||
assert(r >= 0);
|
||||
bufferlist::iterator blp = bl.begin();
|
||||
SnapSet snapset;
|
||||
::decode(snapset, blp);
|
||||
dout(15) << "push_to_replica head snapset is " << snapset << dendl;
|
||||
object_info_t hoi(bl);
|
||||
dout(15) << "push_to_replica head snapset is " << hoi.snapset << dendl;
|
||||
|
||||
calc_clone_subsets(snapset, poid, peer_missing[peer],
|
||||
calc_clone_subsets(hoi.snapset, poid, peer_missing[peer],
|
||||
data_subset, clone_subsets);
|
||||
} else {
|
||||
// pushing head.
|
||||
// base this on partially on replica's clones?
|
||||
bufferlist bl;
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), poid, "snapset", bl);
|
||||
assert(r >= 0);
|
||||
bufferlist::iterator blp = bl.begin();
|
||||
SnapSet snapset;
|
||||
::decode(snapset, blp);
|
||||
dout(15) << "push_to_replica head snapset is " << snapset << dendl;
|
||||
|
||||
calc_head_subsets(snapset, poid, peer_missing[peer], data_subset, clone_subsets);
|
||||
dout(15) << "push_to_replica head snapset is " << oi.snapset << dendl;
|
||||
calc_head_subsets(oi.snapset, poid, peer_missing[peer], data_subset, clone_subsets);
|
||||
}
|
||||
|
||||
dout(10) << "push_to_replica " << poid << " pushing " << data_subset
|
||||
@ -2180,11 +2143,13 @@ void ReplicatedPG::push(pobject_t poid, int peer,
|
||||
osd->store->read(info.pgid.to_coll(), poid, 0, 0, bl);
|
||||
size = bl.length();
|
||||
}
|
||||
bufferlist bv;
|
||||
osd->store->getattr(info.pgid.to_coll(), poid, "oi", bv);
|
||||
object_info_t oi(bv);
|
||||
|
||||
osd->store->getattrs(info.pgid.to_coll(), poid, attrset);
|
||||
|
||||
bufferlist bv;
|
||||
bv.push_back(attrset[OI_ATTR]);
|
||||
object_info_t oi(bv);
|
||||
|
||||
// ok
|
||||
dout(7) << "push " << poid << " v " << oi.version
|
||||
<< " size " << size
|
||||
@ -2315,16 +2280,14 @@ void ReplicatedPG::sub_op_push(MOSDSubOp *op)
|
||||
assert(!missing.is_missing(head.oid));
|
||||
|
||||
bufferlist bl;
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), head, "snapset", bl);
|
||||
int r = osd->store->getattr(info.pgid.to_coll(), head, OI_ATTR, bl);
|
||||
assert(r >= 0);
|
||||
bufferlist::iterator blp = bl.begin();
|
||||
SnapSet snapset;
|
||||
::decode(snapset, blp);
|
||||
object_info_t hoi(bl);
|
||||
|
||||
clone_subsets.clear(); // forget what pusher said; recalculate cloning.
|
||||
|
||||
interval_set<__u64> data_needed;
|
||||
calc_clone_subsets(snapset, poid, missing, data_needed, clone_subsets);
|
||||
calc_clone_subsets(hoi.snapset, poid, missing, data_needed, clone_subsets);
|
||||
|
||||
dout(10) << "sub_op_push need " << data_needed << ", got " << data_subset << dendl;
|
||||
if (!data_needed.subset_of(data_subset)) {
|
||||
@ -2400,17 +2363,15 @@ void ReplicatedPG::sub_op_push(MOSDSubOp *op)
|
||||
|
||||
t.setattrs(info.pgid.to_coll(), poid, op->attrset);
|
||||
if (poid.oid.snap && poid.oid.snap != CEPH_NOSNAP &&
|
||||
op->attrset.count("snaps")) {
|
||||
op->attrset.count(OI_ATTR)) {
|
||||
bufferlist bl;
|
||||
bl.push_back(op->attrset["snaps"]);
|
||||
vector<snapid_t> snaps;
|
||||
bufferlist::iterator p = bl.begin();
|
||||
::decode(snaps, p);
|
||||
if (snaps.size()) {
|
||||
coll_t lc = make_snap_collection(t, snaps[0]);
|
||||
bl.push_back(op->attrset[OI_ATTR]);
|
||||
object_info_t oi(bl);
|
||||
if (oi.snaps.size()) {
|
||||
coll_t lc = make_snap_collection(t, oi.snaps[0]);
|
||||
t.collection_add(lc, info.pgid.to_coll(), poid);
|
||||
if (snaps.size() > 1) {
|
||||
coll_t hc = make_snap_collection(t, snaps[snaps.size()-1]);
|
||||
if (oi.snaps.size() > 1) {
|
||||
coll_t hc = make_snap_collection(t, oi.snaps[oi.snaps.size()-1]);
|
||||
t.collection_add(hc, info.pgid.to_coll(), poid);
|
||||
}
|
||||
}
|
||||
@ -2647,9 +2608,10 @@ int ReplicatedPG::recover_primary(int max)
|
||||
dout(10) << "recover_primary cloning " << head << " v" << latest->prior_version
|
||||
<< " to " << poid << " v" << latest->version
|
||||
<< " snaps " << latest->snaps << dendl;
|
||||
vector<snapid_t> snaps;
|
||||
::decode(snaps, latest->snaps);
|
||||
ObjectStore::Transaction t;
|
||||
_make_clone(t, head, poid, latest->prior_version, latest->version, latest->reqid,
|
||||
latest->snaps);
|
||||
_make_clone(t, head, poid, latest->prior_version, latest->version, latest->reqid, snaps);
|
||||
osd->store->apply_transaction(t);
|
||||
missing.got(latest->oid, latest->version);
|
||||
missing_loc.erase(latest->oid);
|
||||
@ -2859,15 +2821,17 @@ int ReplicatedPG::_scrub(ScrubMap& scrubmap)
|
||||
stat.num_objects++;
|
||||
|
||||
// basic checks.
|
||||
if (p->attrs.count("oi") == 0) {
|
||||
dout(0) << "scrub no 'oi' attr on " << poid << dendl;
|
||||
if (p->attrs.count(OI_ATTR) == 0) {
|
||||
dout(0) << "scrub no '" << OI_ATTR << "' attr on " << poid << dendl;
|
||||
errors++;
|
||||
continue;
|
||||
}
|
||||
bufferlist bv;
|
||||
bv.push_back(p->attrs["oi"]);
|
||||
bv.push_back(p->attrs[OI_ATTR]);
|
||||
object_info_t oi(bv);
|
||||
|
||||
dout(20) << "scrub " << poid << " " << oi << dendl;
|
||||
|
||||
stat.num_bytes += p->size;
|
||||
stat.num_kb += SHIFT_ROUND_UP(p->size, 10);
|
||||
|
||||
@ -2884,16 +2848,7 @@ int ReplicatedPG::_scrub(ScrubMap& scrubmap)
|
||||
errors++;
|
||||
}
|
||||
|
||||
bufferlist bl;
|
||||
if (p->attrs.count("snapset") == 0) {
|
||||
dout(0) << "no 'snapset' attr on " << p->poid << dendl;
|
||||
errors++;
|
||||
continue;
|
||||
}
|
||||
bl.push_back(p->attrs["snapset"]);
|
||||
bufferlist::iterator blp = bl.begin();
|
||||
::decode(snapset, blp);
|
||||
dout(20) << "scrub " << poid << " snapset " << snapset << dendl;
|
||||
snapset = oi.snapset;
|
||||
if (!snapset.head_exists)
|
||||
assert(p->size == 0); // make sure object is 0-sized.
|
||||
|
||||
@ -2922,24 +2877,6 @@ int ReplicatedPG::_scrub(ScrubMap& scrubmap)
|
||||
stat.num_object_clones++;
|
||||
|
||||
assert(poid.oid.snap == snapset.clones[curclone]);
|
||||
bufferlist bl;
|
||||
if (p->attrs.count("snaps") == 0) {
|
||||
dout(0) << "no 'snaps' attr on " << p->poid << dendl;
|
||||
errors++;
|
||||
continue;
|
||||
}
|
||||
bl.push_back(p->attrs["snaps"]);
|
||||
bufferlist::iterator blp = bl.begin();
|
||||
vector<snapid_t> snaps;
|
||||
::decode(snaps, blp);
|
||||
|
||||
eversion_t from;
|
||||
if (p->attrs.count("from_version") == 0) {
|
||||
dout(0) << "no 'from_version' attr on " << p->poid << dendl;
|
||||
errors++;
|
||||
continue;
|
||||
}
|
||||
p->attrs["from_version"].copy_out(0, sizeof(from), (char *)&from);
|
||||
|
||||
assert(p->size == snapset.clone_size[curclone]);
|
||||
|
||||
|
@ -34,13 +34,12 @@ public:
|
||||
int ref;
|
||||
pobject_t poid;
|
||||
|
||||
SnapSet snapset;
|
||||
bool exists;
|
||||
__u64 size;
|
||||
|
||||
object_info_t oi;
|
||||
|
||||
ProjectedObjectInfo() : ref(0), exists(false), size(0) {}
|
||||
ProjectedObjectInfo() : ref(0), exists(false), size(0), oi(poid) {}
|
||||
};
|
||||
|
||||
|
||||
@ -179,11 +178,10 @@ protected:
|
||||
|
||||
void _make_clone(ObjectStore::Transaction& t,
|
||||
pobject_t head, pobject_t coid,
|
||||
eversion_t ov, eversion_t v, osd_reqid_t& reqid, bufferlist& snaps);
|
||||
eversion_t ov, eversion_t v, osd_reqid_t& reqid, vector<snapid_t>& snaps);
|
||||
void prepare_clone(ObjectStore::Transaction& t, bufferlist& logbl, osd_reqid_t reqid, pg_stat_t& st,
|
||||
pobject_t poid, loff_t old_size,
|
||||
eversion_t old_version, eversion_t& at_version,
|
||||
SnapSet& snapset, SnapContext& snapc);
|
||||
pobject_t poid, loff_t old_size, object_info_t& oi,
|
||||
eversion_t& at_version, SnapContext& snapc);
|
||||
void add_interval_usage(interval_set<__u64>& s, pg_stat_t& st);
|
||||
int prepare_simple_op(ObjectStore::Transaction& t, osd_reqid_t reqid, pg_stat_t& st,
|
||||
pobject_t poid, __u64& old_size, bool& exists,
|
||||
@ -193,8 +191,7 @@ protected:
|
||||
pobject_t poid,
|
||||
vector<ceph_osd_op>& ops, bufferlist& bl,
|
||||
bool& exists, __u64& size, object_info_t& oi,
|
||||
eversion_t at_version,
|
||||
SnapSet& snapset, SnapContext& snapc,
|
||||
eversion_t at_version, SnapContext& snapc,
|
||||
__u32 inc_lock, eversion_t trim_to);
|
||||
|
||||
friend class C_OSD_ModifyCommit;
|
||||
|
@ -280,37 +280,6 @@ inline ostream& operator<<(ostream& out, const eversion_t e) {
|
||||
|
||||
|
||||
|
||||
struct object_info_t {
|
||||
eversion_t version, prior_version;
|
||||
osd_reqid_t last_reqid;
|
||||
utime_t mtime;
|
||||
|
||||
void encode(bufferlist& bl) const {
|
||||
::encode(version, bl);
|
||||
::encode(prior_version, bl);
|
||||
::encode(last_reqid, bl);
|
||||
::encode(mtime, bl);
|
||||
}
|
||||
void decode(bufferlist::iterator& bl) {
|
||||
::decode(version, bl);
|
||||
::decode(prior_version, bl);
|
||||
::decode(last_reqid, bl);
|
||||
::decode(mtime, bl);
|
||||
}
|
||||
void decode(bufferlist& bl) {
|
||||
bufferlist::iterator p = bl.begin();
|
||||
decode(p);
|
||||
}
|
||||
|
||||
object_info_t() {}
|
||||
object_info_t(bufferlist& bl) {
|
||||
decode(bl);
|
||||
}
|
||||
};
|
||||
WRITE_CLASS_ENCODER(object_info_t)
|
||||
|
||||
|
||||
|
||||
/** osd_stat
|
||||
* aggregate stats for an osd
|
||||
*/
|
||||
@ -745,6 +714,66 @@ inline ostream& operator<<(ostream& out, const SnapSet& cs) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define OI_ATTR "_"
|
||||
|
||||
struct object_info_t {
|
||||
pobject_t poid;
|
||||
|
||||
eversion_t version, prior_version;
|
||||
osd_reqid_t last_reqid;
|
||||
utime_t mtime;
|
||||
|
||||
SnapSet snapset; // [head]
|
||||
vector<snapid_t> snaps; // [clone]
|
||||
|
||||
void encode(bufferlist& bl) const {
|
||||
::encode(poid, bl);
|
||||
::encode(version, bl);
|
||||
::encode(prior_version, bl);
|
||||
::encode(last_reqid, bl);
|
||||
::encode(mtime, bl);
|
||||
if (poid.oid.snap == CEPH_NOSNAP)
|
||||
::encode(snapset, bl);
|
||||
else
|
||||
::encode(snaps, bl);
|
||||
}
|
||||
void decode(bufferlist::iterator& bl) {
|
||||
::decode(poid, bl);
|
||||
::decode(version, bl);
|
||||
::decode(prior_version, bl);
|
||||
::decode(last_reqid, bl);
|
||||
::decode(mtime, bl);
|
||||
if (poid.oid.snap == CEPH_NOSNAP)
|
||||
::decode(snapset, bl);
|
||||
else
|
||||
::decode(snaps, bl);
|
||||
}
|
||||
void decode(bufferlist& bl) {
|
||||
bufferlist::iterator p = bl.begin();
|
||||
decode(p);
|
||||
}
|
||||
|
||||
object_info_t(pobject_t p) : poid(p) {}
|
||||
object_info_t(bufferlist& bl) {
|
||||
decode(bl);
|
||||
}
|
||||
};
|
||||
WRITE_CLASS_ENCODER(object_info_t)
|
||||
|
||||
|
||||
inline ostream& operator<<(ostream& out, const object_info_t& oi) {
|
||||
out << oi.poid << "(" << oi.version
|
||||
<< " " << oi.last_reqid;
|
||||
if (oi.poid.oid.snap == CEPH_NOSNAP)
|
||||
out << " " << oi.snapset << ")";
|
||||
else
|
||||
out << " " << oi.snaps << ")";
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* summarize pg contents for purposes of a scrub
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user