From b3a129041a7ca29d99218fbd31218d800cb83906 Mon Sep 17 00:00:00 2001 From: Xiaowei Chen Date: Sun, 15 Nov 2015 21:40:29 -0500 Subject: [PATCH 1/2] osd: change mutex to spinlock to optimize thread context switch. Signed-off-by: Xiaowei Chen --- src/osd/OSD.cc | 50 ++++++++++++++++++++++++++++++++++---------------- src/osd/OSD.h | 10 +++++----- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index afe6ef50d0b..1c69f4025f6 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5601,9 +5601,9 @@ void OSD::ms_fast_preprocess(Message *m) MOSDMap *mm = static_cast(m); Session *s = static_cast(m->get_connection()->get_priv()); if (s) { - s->received_map_lock.Lock(); + s->received_map_lock.lock(); s->received_map_epoch = mm->get_last(); - s->received_map_lock.Unlock(); + s->received_map_lock.unlock(); s->put(); } } @@ -5817,9 +5817,9 @@ bool OSD::dispatch_op_fast(OpRequestRef& op, OSDMapRef& osdmap) Session *s = static_cast(op->get_req()-> get_connection()->get_priv()); if (s) { - s->received_map_lock.Lock(); + s->received_map_lock.lock(); epoch_t received_epoch = s->received_map_epoch; - s->received_map_lock.Unlock(); + s->received_map_lock.unlock(); if (received_epoch < msg_epoch) { osdmap_subscribe(msg_epoch, false); } @@ -8043,17 +8043,24 @@ public: void finish(ThreadPool::TPHandle& tp) { OSD::Session *session = static_cast( con->get_priv()); + epoch_t last_sent_epoch; if (session) { - session->sent_epoch_lock.Lock(); + session->sent_epoch_lock.lock(); + last_sent_epoch = session->last_sent_epoch; + session->sent_epoch_lock.unlock(); } osd->service.share_map( name, con.get(), map_epoch, osdmap, - session ? &session->last_sent_epoch : NULL); + session ? &last_sent_epoch : NULL); if (session) { - session->sent_epoch_lock.Unlock(); + session->sent_epoch_lock.lock(); + if (session->last_sent_epoch < last_sent_epoch) { + session->last_sent_epoch = last_sent_epoch; + } + session->sent_epoch_lock.unlock(); session->put(); } } @@ -8092,14 +8099,16 @@ void OSD::handle_op(OpRequestRef& op, OSDMapRef& osdmap) send_map_on_destruct share_map(this, m, osdmap, m->get_map_epoch()); Session *client_session = static_cast(m->get_connection()->get_priv()); + epoch_t last_sent_epoch; if (client_session) { - client_session->sent_epoch_lock.Lock(); + client_session->sent_epoch_lock.lock(); + last_sent_epoch = client_session->last_sent_epoch; + client_session->sent_epoch_lock.unlock(); } share_map.should_send = service.should_share_map( m->get_source(), m->get_connection().get(), m->get_map_epoch(), - osdmap, &client_session->last_sent_epoch); + osdmap, client_session ? &last_sent_epoch : NULL); if (client_session) { - client_session->sent_epoch_lock.Unlock(); client_session->put(); } @@ -8196,15 +8205,17 @@ void OSD::handle_replica_op(OpRequestRef& op, OSDMapRef& osdmap) bool should_share_map = false; Session *peer_session = static_cast(m->get_connection()->get_priv()); + epoch_t last_sent_epoch; if (peer_session) { - peer_session->sent_epoch_lock.Lock(); + peer_session->sent_epoch_lock.lock(); + last_sent_epoch = peer_session->last_sent_epoch; + peer_session->sent_epoch_lock.unlock(); } should_share_map = service.should_share_map( m->get_source(), m->get_connection().get(), m->map_epoch, osdmap, - peer_session ? &peer_session->last_sent_epoch : NULL); + peer_session ? &last_sent_epoch : NULL); if (peer_session) { - peer_session->sent_epoch_lock.Unlock(); peer_session->put(); } @@ -8398,17 +8409,24 @@ void OSD::dequeue_op( if (op->send_map_update) { Message *m = op->get_req(); Session *session = static_cast(m->get_connection()->get_priv()); + epoch_t last_sent_epoch; if (session) { - session->sent_epoch_lock.Lock(); + session->sent_epoch_lock.lock(); + last_sent_epoch = session->last_sent_epoch; + session->sent_epoch_lock.unlock(); } service.share_map( m->get_source(), m->get_connection().get(), op->sent_epoch, osdmap, - session ? &session->last_sent_epoch : NULL); + session ? &last_sent_epoch : NULL); if (session) { - session->sent_epoch_lock.Unlock(); + session->sent_epoch_lock.lock(); + if (session->last_sent_epoch < last_sent_epoch) { + session->last_sent_epoch = last_sent_epoch; + } + session->sent_epoch_lock.unlock(); session->put(); } } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 44a492c83aa..b8f2f4001ad 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -54,6 +54,7 @@ using namespace std; #include "common/sharedptr_registry.hpp" #include "common/PrioritizedQueue.h" #include "messages/MOSDOp.h" +#include "include/Spinlock.h" #define CEPH_OSD_PROTOCOL 10 /* cluster internal */ @@ -1286,17 +1287,16 @@ public: OSDMapRef osdmap; /// Map as of which waiting_for_pg is current map > waiting_for_pg; - Mutex sent_epoch_lock; + Spinlock sent_epoch_lock; epoch_t last_sent_epoch; - Mutex received_map_lock; + Spinlock received_map_lock; epoch_t received_map_epoch; // largest epoch seen in MOSDMap from here Session(CephContext *cct) : RefCountedObject(cct), auid(-1), con(0), - session_dispatch_lock("Session::session_dispatch_lock"), - sent_epoch_lock("Session::sent_epoch_lock"), last_sent_epoch(0), - received_map_lock("Session::received_map_lock"), received_map_epoch(0) + session_dispatch_lock("Session::session_dispatch_lock"), + last_sent_epoch(0), received_map_epoch(0) {} From a8b746411efdf31fc6f60c15b5caacf54af5bc20 Mon Sep 17 00:00:00 2001 From: Xiaowei Chen Date: Sun, 15 Nov 2015 21:56:22 -0500 Subject: [PATCH 2/2] osdservice: state changed to atomic_t to decrease thread context switch. service.is_stopping is in the key IO path, hot call, better use spinlock. Signed-off-by: Xiaowei Chen --- src/osd/OSD.cc | 13 ++++++------- src/osd/OSD.h | 15 ++++++++++----- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 1c69f4025f6..2a9d8842799 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1021,13 +1021,13 @@ void OSDService::set_epochs(const epoch_t *_boot_epoch, const epoch_t *_up_epoch bool OSDService::prepare_to_stop() { Mutex::Locker l(is_stopping_lock); - if (state != NOT_STOPPING) + if (get_state() != NOT_STOPPING) return false; OSDMapRef osdmap = get_osdmap(); if (osdmap && osdmap->is_up(whoami)) { dout(0) << __func__ << " telling mon we are shutting down" << dendl; - state = PREPARING_TO_STOP; + set_state(PREPARING_TO_STOP); monc->send_mon_message(new MOSDMarkMeDown(monc->get_fsid(), osdmap->get_inst(whoami), osdmap->get_epoch(), @@ -1037,28 +1037,27 @@ bool OSDService::prepare_to_stop() utime_t timeout; timeout.set_from_double(now + cct->_conf->osd_mon_shutdown_timeout); while ((ceph_clock_now(cct) < timeout) && - (state != STOPPING)) { + (get_state() != STOPPING)) { is_stopping_cond.WaitUntil(is_stopping_lock, timeout); } } dout(0) << __func__ << " starting shutdown" << dendl; - state = STOPPING; + set_state(STOPPING); return true; } void OSDService::got_stop_ack() { Mutex::Locker l(is_stopping_lock); - if (state == PREPARING_TO_STOP) { + if (get_state() == PREPARING_TO_STOP) { dout(0) << __func__ << " starting shutdown" << dendl; - state = STOPPING; + set_state(STOPPING); is_stopping_cond.Signal(); } else { dout(10) << __func__ << " ignoring msg" << dendl; } } - MOSDMap *OSDService::build_incremental_map_msg(epoch_t since, epoch_t to, OSDSuperblock& sblock) { diff --git a/src/osd/OSD.h b/src/osd/OSD.h index b8f2f4001ad..65a0d604509 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -970,14 +970,19 @@ public: enum { NOT_STOPPING, PREPARING_TO_STOP, - STOPPING } state; + STOPPING }; + atomic_t state; + int get_state() { + return state.read(); + } + void set_state(int s) { + state.set(s); + } bool is_stopping() { - Mutex::Locker l(is_stopping_lock); - return state == STOPPING; + return get_state() == STOPPING; } bool is_preparing_to_stop() { - Mutex::Locker l(is_stopping_lock); - return state == PREPARING_TO_STOP; + return get_state() == PREPARING_TO_STOP; } bool prepare_to_stop(); void got_stop_ack();