mirror of
https://github.com/ceph/ceph
synced 2024-12-18 09:25:49 +00:00
Merge pull request #19796 from LiumxNL/fix-ooo-caused-con-reset
osd: fix out of order caused by letting old msg from down osd be processed Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
commit
a7dc224536
@ -537,6 +537,12 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
OSDMapRef get_next_osdmap() {
|
||||
Mutex::Locker l(pre_publish_lock);
|
||||
if (!next_osdmap)
|
||||
return OSDMapRef();
|
||||
return next_osdmap;
|
||||
}
|
||||
|
||||
private:
|
||||
Mutex peer_map_epoch_lock;
|
||||
|
@ -5737,14 +5737,15 @@ bool PG::can_discard_replica_op(OpRequestRef& op)
|
||||
// connection to it when handling the new osdmap marking it down, and also
|
||||
// resets the messenger sesssion when the replica reconnects. to avoid the
|
||||
// out-of-order replies, the messages from that replica should be discarded.
|
||||
if (osd->get_osdmap()->is_down(from))
|
||||
OSDMapRef next_map = osd->get_next_osdmap();
|
||||
if (next_map->is_down(from))
|
||||
return true;
|
||||
/* Mostly, this overlaps with the old_peering_msg
|
||||
* condition. An important exception is pushes
|
||||
* sent by replicas not in the acting set, since
|
||||
* if such a replica goes down it does not cause
|
||||
* a new interval. */
|
||||
if (get_osdmap()->get_down_at(from) >= m->map_epoch)
|
||||
if (next_map->get_down_at(from) >= m->map_epoch)
|
||||
return true;
|
||||
|
||||
// same pg?
|
||||
|
Loading…
Reference in New Issue
Block a user