mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
osd: maintain up_epoch AND boot_epoch; revise OSDSuperblock accordingly
In order to make the superblock clean interval meaningful after we are marked down and then up again (over the life of a single cosd process insance), we track both boot_epoch and up_epoch, and keep [boot_epoch,clean_thru] in the superblock. This avoids seeing crashed pgs when and osd is wrongly marked down and the osd marks itself up again.
This commit is contained in:
parent
18851ce0f5
commit
026166ff53
@ -223,7 +223,7 @@ OSD::OSD(int id, Messenger *m, Messenger *hbm, MonMap *mm, const char *dev, cons
|
||||
logclient(messenger, monmap),
|
||||
whoami(id),
|
||||
dev_path(dev), journal_path(jdev),
|
||||
state(STATE_BOOTING), boot_epoch(0),
|
||||
state(STATE_BOOTING), boot_epoch(0), up_epoch(0),
|
||||
op_tp("OSD::op_tp", g_conf.osd_maxthreads),
|
||||
recovery_tp("OSD::recovery_tp", 1),
|
||||
disk_tp("OSD::disk_tp", 2),
|
||||
@ -467,8 +467,8 @@ int OSD::shutdown()
|
||||
|
||||
// note unmount epoch
|
||||
dout(10) << "noting clean unmount in epoch " << osdmap->get_epoch() << dendl;
|
||||
superblock.epoch_mounted = boot_epoch;
|
||||
superblock.epoch_unmounted = osdmap->get_epoch();
|
||||
superblock.mounted = boot_epoch;
|
||||
superblock.clean_thru = osdmap->get_epoch();
|
||||
ObjectStore::Transaction t;
|
||||
write_superblock(t);
|
||||
store->apply_transaction(t);
|
||||
@ -1915,16 +1915,17 @@ void OSD::handle_osd_map(MOSDMap *m)
|
||||
dout(0) << "map says i am down. switching to boot state." << dendl;
|
||||
//shutdown();
|
||||
|
||||
// note in the superblock that we were clean up until this point.
|
||||
superblock.epoch_mounted = boot_epoch;
|
||||
superblock.epoch_unmounted = osdmap->get_epoch();
|
||||
|
||||
state = STATE_BOOTING;
|
||||
boot_epoch = 0;
|
||||
up_epoch = 0;
|
||||
|
||||
reset_heartbeat_peers();
|
||||
}
|
||||
|
||||
// note in the superblock that we were clean thru the prior epoch
|
||||
if (boot_epoch && boot_epoch >= superblock.mounted) {
|
||||
superblock.mounted = boot_epoch;
|
||||
superblock.clean_thru = osdmap->get_epoch();
|
||||
}
|
||||
|
||||
// superblock and commit
|
||||
write_superblock(t);
|
||||
@ -1957,11 +1958,15 @@ void OSD::advance_map(ObjectStore::Transaction& t, interval_set<snapid_t>& remov
|
||||
<< " removed_snaps " << removed_snaps
|
||||
<< dendl;
|
||||
|
||||
if (!boot_epoch &&
|
||||
if (!up_epoch &&
|
||||
osdmap->is_up(whoami) &&
|
||||
osdmap->get_inst(whoami) == messenger->get_myinst()) {
|
||||
boot_epoch = osdmap->get_epoch();
|
||||
dout(10) << "my boot_epoch is " << boot_epoch << dendl;
|
||||
up_epoch = osdmap->get_epoch();
|
||||
dout(10) << "up_epoch is " << up_epoch << dendl;
|
||||
if (!boot_epoch) {
|
||||
boot_epoch = osdmap->get_epoch();
|
||||
dout(10) << "boot_epoch is " << boot_epoch << dendl;
|
||||
}
|
||||
}
|
||||
|
||||
// scan pg creations
|
||||
@ -2334,8 +2339,8 @@ bool OSD::require_same_or_newer_map(Message *m, epoch_t epoch)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (epoch < boot_epoch) {
|
||||
dout(7) << "from pre-boot epoch " << epoch << " < " << boot_epoch << dendl;
|
||||
if (epoch < up_epoch) {
|
||||
dout(7) << "from pre-up epoch " << epoch << " < " << up_epoch << dendl;
|
||||
delete m;
|
||||
return false;
|
||||
}
|
||||
@ -3535,8 +3540,8 @@ void OSD::handle_op(MOSDOp *op)
|
||||
void OSD::handle_sub_op(MOSDSubOp *op)
|
||||
{
|
||||
dout(10) << "handle_sub_op " << *op << " epoch " << op->map_epoch << dendl;
|
||||
if (op->map_epoch < boot_epoch) {
|
||||
dout(3) << "replica op from before boot" << dendl;
|
||||
if (op->map_epoch < up_epoch) {
|
||||
dout(3) << "replica op from before up" << dendl;
|
||||
delete op;
|
||||
return;
|
||||
}
|
||||
@ -3583,8 +3588,8 @@ void OSD::handle_sub_op(MOSDSubOp *op)
|
||||
}
|
||||
void OSD::handle_sub_op_reply(MOSDSubOpReply *op)
|
||||
{
|
||||
if (op->get_map_epoch() < boot_epoch) {
|
||||
dout(3) << "replica op reply from before boot" << dendl;
|
||||
if (op->get_map_epoch() < up_epoch) {
|
||||
dout(3) << "replica op reply from before up" << dendl;
|
||||
delete op;
|
||||
return;
|
||||
}
|
||||
|
@ -144,7 +144,8 @@ public:
|
||||
|
||||
private:
|
||||
int state;
|
||||
epoch_t boot_epoch;
|
||||
epoch_t boot_epoch; // _first_ epoch we were marked up (after this process started)
|
||||
epoch_t up_epoch; // _most_recent_ epoch we were marked up
|
||||
|
||||
public:
|
||||
bool is_booting() { return state == STATE_BOOTING; }
|
||||
|
@ -630,13 +630,14 @@ public:
|
||||
epoch_t oldest_map, newest_map; // oldest/newest maps we have.
|
||||
double weight;
|
||||
|
||||
epoch_t epoch_unmounted; // last epoch i cleanly unmounted
|
||||
epoch_t epoch_mounted; // ...and the epoch i originally mounted it
|
||||
// last interval over which i mounted and was then active
|
||||
epoch_t mounted; // last epoch i mounted
|
||||
epoch_t clean_thru; // epoch i was active and clean thru
|
||||
|
||||
OSDSuperblock() :
|
||||
whoami(-1),
|
||||
current_epoch(0), oldest_map(0), newest_map(0), weight(0),
|
||||
epoch_unmounted(0), epoch_mounted(0) {
|
||||
mounted(0), clean_thru(0) {
|
||||
memset(&fsid, 0, sizeof(fsid));
|
||||
}
|
||||
|
||||
@ -648,8 +649,8 @@ public:
|
||||
::encode(oldest_map, bl);
|
||||
::encode(newest_map, bl);
|
||||
::encode(weight, bl);
|
||||
::encode(epoch_unmounted, bl);
|
||||
::encode(epoch_mounted, bl);
|
||||
::encode(clean_thru, bl);
|
||||
::encode(mounted, bl);
|
||||
}
|
||||
void decode(bufferlist::iterator &bl) {
|
||||
::decode(magic, bl);
|
||||
@ -659,8 +660,8 @@ public:
|
||||
::decode(oldest_map, bl);
|
||||
::decode(newest_map, bl);
|
||||
::decode(weight, bl);
|
||||
::decode(epoch_unmounted, bl);
|
||||
::decode(epoch_mounted, bl);
|
||||
::decode(clean_thru, bl);
|
||||
::decode(mounted, bl);
|
||||
}
|
||||
};
|
||||
WRITE_CLASS_ENCODER(OSDSuperblock)
|
||||
@ -671,7 +672,7 @@ inline ostream& operator<<(ostream& out, OSDSuperblock& sb)
|
||||
<< " osd" << sb.whoami
|
||||
<< " e" << sb.current_epoch
|
||||
<< " [" << sb.oldest_map << "," << sb.newest_map << "]"
|
||||
<< " lci=[" << sb.epoch_mounted << "," << sb.epoch_unmounted << "]"
|
||||
<< " lci=[" << sb.mounted << "," << sb.clean_thru << "]"
|
||||
<< ")";
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user