From 891f5192427a4a783d5d7194fc2556dfdc1a0ed2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 30 Apr 2018 09:52:38 -0500 Subject: [PATCH] osdc/Objecter: fix recursive locking in _finish_command The path #9 Objecter::_finish_command (this=this@entry=0x7f76c00aeb30, c=c@entry=0x7f76b0000b10, r=, rs="osd down") at /build/ceph-13.0.2-1932-g458b4fb/src/osdc/Objecter.cc:4950 #10 0x00007f76d26de106 in Objecter::_check_command_map_dne (this=this@entry=0x7f76c00aeb30, c=c@entry=0x7f76b0000b10) at /build/ceph-13.0.2-1932-g458b4fb/src/osdc/Objecter.cc:1726 #11 0x00007f76d26e52e4 in Objecter::_scan_requests (this=this@entry=0x7f76c00aeb30, s=0x7f76c00af8a0, skipped_map=skipped_map@entry=false, cluster_full=cluster_full@entry=false, pool_full_map=0x7f76be7fb330, need_resend=..., need_resend_linger=..., need_resend_command=std::map with 0 elements, sul=..., gap_removed_snaps=0x7f76ac0016f8) at /build/ceph-13.0.2-1932-g458b4fb/src/osdc/Objecter.cc:1120 #12 0x00007f76d26eded5 in Objecter::handle_osd_map (this=this@entry=0x7f76c00aeb30, m=m@entry=0x7f76ac0014a0) at /build/ceph-13.0.2-1932-g458b4fb/src/osdc/Objecter.cc:1228 led to recursive lock of the session mutex (locked in _scan_requests, and again in _finish_command). Fix by making the callers for _finish_command (and _check_command_map_dne) take the session lock. Fixes: http://tracker.ceph.com/issues/23940 Signed-off-by: Sage Weil --- src/osdc/Objecter.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index ba5a4e3dbfd..6f1ce3d640c 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -1708,7 +1708,9 @@ void Objecter::C_Command_Map_Latest::finish(int r) if (c->map_dne_bound == 0) c->map_dne_bound = latest; + OSDSession::unique_lock sul(c->session->lock); objecter->_check_command_map_dne(c); + sul.unlock(); c->put(); } @@ -1716,6 +1718,7 @@ void Objecter::C_Command_Map_Latest::finish(int r) void Objecter::_check_command_map_dne(CommandOp *c) { // rwlock is locked unique + // session is locked unique ldout(cct, 10) << "_check_command_map_dne tid " << c->tid << " current " << osdmap->get_epoch() @@ -1733,6 +1736,7 @@ void Objecter::_check_command_map_dne(CommandOp *c) void Objecter::_send_command_map_check(CommandOp *c) { // rwlock is locked unique + // session is locked unique // ask the monitor if (check_latest_map_commands.count(c->tid) == 0) { @@ -4781,8 +4785,10 @@ void Objecter::handle_command_reply(MCommandReply *m) sl.unlock(); - + OSDSession::unique_lock sul(s->lock); _finish_command(c, m->r, m->rs); + sul.unlock(); + m->put(); s->put(); } @@ -4928,13 +4934,16 @@ int Objecter::command_op_cancel(OSDSession *s, ceph_tid_t tid, int r) CommandOp *op = it->second; _command_cancel_map_check(op); + OSDSession::unique_lock sl(op->session->lock); _finish_command(op, r, ""); + sl.unlock(); return 0; } void Objecter::_finish_command(CommandOp *c, int r, string rs) { // rwlock is locked unique + // session lock is locked ldout(cct, 10) << "_finish_command " << c->tid << " = " << r << " " << rs << dendl; @@ -4947,9 +4956,7 @@ void Objecter::_finish_command(CommandOp *c, int r, string rs) timer.cancel_event(c->ontimeout); OSDSession *s = c->session; - OSDSession::unique_lock sl(s->lock); _session_command_op_remove(c->session, c); - sl.unlock(); c->put();