From da3e4d9291c72783bdd6111dcf94af5db49a371d Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Wed, 9 Jun 2021 14:31:50 +0000 Subject: [PATCH] crimson/monc: don't serve auth requests without active mon connection. It's yet another racing issue which happens when auth request handling is performed during the `active_con` reset sequence. It caused the following `nullptr` dereference at Sepia: ``` DEBUG 2021-06-09 10:27:24,059 [shard 0] ms - [osd.6(client) v2:172.21.15.170:6809/33397 >> client.? -@39840] GOT AuthRequestFrame: method=2, p referred_modes={2, 1}, payload_len=174 /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-4977-g65cb255e/rpm/el8/BUILD/ceph-17.0.0-4977-g65cb255e/src/crimson/mon/MonClient.cc:595:26: runtime error: member call on null pointer of type 'struct Connection' /home/jenkins-build/build/workspace/ceph-dev-new-build/ARCH/x86_64/AVAILABLE_ARCH/x86_64/AVAILABLE_DIST/centos8/DIST/centos8/MACHINE_SIZE/gigantic/release/17.0.0-4977-g65cb255e/rpm/el8/BUILD/ceph-17.0.0-4977-g65cb255e/src/crimson/mon/MonClient.cc:178:11: runtime error: member access within null pointer of type 'struct Connection' Segmentation fault on shard 0. Backtrace: 0# 0x0000563F9C00395F in ceph-osd 1# FatalSignal::signaled(int, siginfo_t const*) in ceph-osd 2# FatalSignal::install_oneshot_signal_handler<11>()::{lambda(int, siginfo_t*, void*)#1}::_FUN(int, siginfo_t*, void*) in ceph-osd 3# 0x00007F4A064D0B20 in /lib64/libpthread.so.0 4# crimson::mon::Connection::get_keys() in ceph-osd 5# crimson::mon::Client::handle_auth_request(seastar::shared_ptr, seastar::lw_shared_ptr, bool, unsigned int, ceph::buffer::v15_2_0::list const&, ceph::buffer::v15_2_0::list*) in ceph-osd 6# crimson::net::ProtocolV2::_handle_auth_request(ceph::buffer::v15_2_0::list&, bool) in ceph-osd 7# 0x0000563F9D007B39 in ceph-osd 8# 0x0000563F9D008C45 in ceph-osd 9# 0x0000563F95FF8D70 in ceph-osd 10# 0x0000563FA1A560BF in ceph-osd 11# 0x0000563FA1A5B600 in ceph-osd 12# 0x0000563FA1C0D66B in ceph-osd 13# 0x0000563FA176B0EA in ceph-osd 14# 0x0000563FA177520E in ceph-osd 15# main in ceph-osd 16# __libc_start_main in /lib64/libc.so.6 17# _start in ceph-osd Fault at location: 0xb0 ``` Signed-off-by: Radoslaw Zarzynski --- src/crimson/mon/MonClient.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/crimson/mon/MonClient.cc b/src/crimson/mon/MonClient.cc index 64261865ce5..664ba9ce146 100644 --- a/src/crimson/mon/MonClient.cc +++ b/src/crimson/mon/MonClient.cc @@ -587,6 +587,11 @@ int Client::handle_auth_request(crimson::net::ConnectionRef con, logger().info("skipping challenge on {}", con); authorizer_challenge = nullptr; } + if (!active_con) { + logger().info("auth request during inactivity period"); + // let's instruct the client to come back later + return -EBUSY; + } bool was_challenge = (bool)auth_meta->authorizer_challenge; EntityName name; AuthCapsInfo caps_info;