From ce49968938ca3636f48fe543111aa219f36914d8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 5 Jan 2013 20:53:49 -0800 Subject: [PATCH 1/7] os/FileJournal: include limits.h Needed for IOV_MAX. Signed-off-by: Sage Weil --- src/os/FileJournal.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc index 20324f37a3d..50bbb95190e 100644 --- a/src/os/FileJournal.cc +++ b/src/os/FileJournal.cc @@ -23,6 +23,7 @@ #include "include/compat.h" #include +#include #include #include #include From 2a1eb466d3f8e25ec8906b3ca6118a14c4e269d2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 5 Jan 2013 09:29:50 -0800 Subject: [PATCH 2/7] msg/Pipe: fix msg leak in requeue_sent() The sent list owns a reference to each message. Signed-off-by: Sage Weil --- src/msg/Pipe.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/msg/Pipe.cc b/src/msg/Pipe.cc index ec9dd24045c..bf8c4566d69 100644 --- a/src/msg/Pipe.cc +++ b/src/msg/Pipe.cc @@ -1070,14 +1070,17 @@ void Pipe::requeue_sent(uint64_t max_acked) list& rq = out_q[CEPH_MSG_PRIO_HIGHEST]; while (!sent.empty()) { Message *m = sent.back(); + sent.pop_back(); if (m->get_seq() > max_acked) { - sent.pop_back(); ldout(msgr->cct,10) << "requeue_sent " << *m << " for resend seq " << out_seq << " (" << m->get_seq() << ")" << dendl; rq.push_front(m); out_seq--; - } else - sent.clear(); + } else { + ldout(msgr->cct,10) << "requeue_sent " << *m << " for resend seq " << out_seq + << " <= max_acked " << max_acked << ", discarding" << dendl; + m->put(); + } } } From a058f16113efa8f32eb5503d5443aa139754d479 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 5 Jan 2013 10:39:08 -0800 Subject: [PATCH 3/7] msg/Pipe: associate sending msgs to con inside lock Associate a sending message with the connection inside the pipe_lock. This way if a racing thread tries to steal these messages it will be sure to reset the con point *after* we do such that it the con pointer is valid in encode_payload() (and later). This may be part of #3678. Signed-off-by: Sage Weil --- src/msg/Pipe.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/msg/Pipe.cc b/src/msg/Pipe.cc index bf8c4566d69..aacc0109496 100644 --- a/src/msg/Pipe.cc +++ b/src/msg/Pipe.cc @@ -1466,13 +1466,14 @@ void Pipe::writer() sent.push_back(m); m->get(); } - pipe_lock.Unlock(); - - ldout(msgr->cct,20) << "writer encoding " << m->get_seq() << " " << m << " " << *m << dendl; // associate message with Connection (for benefit of encode_payload) m->set_connection(connection_state->get()); + pipe_lock.Unlock(); + + ldout(msgr->cct,20) << "writer encoding " << m->get_seq() << " " << m << " " << *m << dendl; + // encode and copy out of *m m->encode(connection_state->get_features(), !msgr->cct->_conf->ms_nocrc); From 4cfc4903c6fb130b6ac9105baf1f66fbda797f14 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 6 Jan 2013 08:25:40 -0800 Subject: [PATCH 4/7] msg/Pipe: encode message inside pipe_lock This modifies bufferlists in the Message struct, and it is possible for multiple instances of the Pipe to get references on the Message; make sure they don't modify those bufferlists concurrently. Signed-off-by: Sage Weil --- src/msg/Pipe.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/msg/Pipe.cc b/src/msg/Pipe.cc index aacc0109496..27d211cf59e 100644 --- a/src/msg/Pipe.cc +++ b/src/msg/Pipe.cc @@ -1470,13 +1470,13 @@ void Pipe::writer() // associate message with Connection (for benefit of encode_payload) m->set_connection(connection_state->get()); - pipe_lock.Unlock(); - ldout(msgr->cct,20) << "writer encoding " << m->get_seq() << " " << m << " " << *m << dendl; // encode and copy out of *m m->encode(connection_state->get_features(), !msgr->cct->_conf->ms_nocrc); + pipe_lock.Unlock(); + ldout(msgr->cct,20) << "writer sending " << m->get_seq() << " " << m << dendl; int rc = write_message(m); From 62586884afd56f2148205bdadc5a67037a750a9b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 Jan 2013 12:58:39 -0800 Subject: [PATCH 5/7] osdc/Objecter: fix linger_ops iterator invalidation on pool deletion The call to check_linger_pool_dne() may unregister the linger request, invalidating the iterator. To avoid this, increment the iterator at the top of the loop. This mirror the fix in 4bf9078286d58c2cd4e85cb8b31411220a377092 for regular non-linger ops. Fixes: #3734 Signed-off-by: Sage Weil Reviewed-by: Samuel Just Reviewed-by: Greg Farnum --- src/osdc/Objecter.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index 04a74b87b66..339499fd96a 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -399,10 +399,10 @@ void Objecter::scan_requests(bool skipped_map, list& need_resend_linger) { // check for changed linger mappings (_before_ regular ops) - for (map::iterator p = linger_ops.begin(); - p != linger_ops.end(); - p++) { - LingerOp *op = p->second; + map::iterator lp = linger_ops.begin(); + while (lp != linger_ops.end()) { + LingerOp *op = lp->second; + ++lp; // check_linger_pool_dne() may touch linger_ops; prevent iterator invalidation ldout(cct, 10) << " checking linger op " << op->linger_id << dendl; int r = recalc_linger_op_target(op); switch (r) { From 40706afc66f485b2bd40b2b4b1cd5377244f8758 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 6 Jan 2013 08:33:01 -0800 Subject: [PATCH 6/7] msgr: update Message envelope in encode, not write_message Fill out the Message header, footer, and calculate CRCs during encoding, not write_message(). This removes most modifications from Pipe::write_message(). Signed-off-by: Sage Weil --- src/msg/Message.cc | 14 +++++++++++--- src/msg/Pipe.cc | 11 ++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/msg/Message.cc b/src/msg/Message.cc index 8d4a2f7e4d6..cfab666ddf6 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -164,6 +164,15 @@ void Message::encode(uint64_t features, bool datacrc) header.compat_version = header.version; } calc_front_crc(); + + // update envelope + header.front_len = get_payload().length(); + header.middle_len = get_middle().length(); + header.data_len = get_data().length(); + calc_header_crc(); + + footer.flags = CEPH_MSG_FOOTER_COMPLETE; + if (datacrc) { calc_data_crc(); @@ -196,10 +205,9 @@ void Message::encode(uint64_t features, bool datacrc) } } #endif - - } - else + } else { footer.flags = (unsigned)footer.flags | CEPH_MSG_FOOTER_NOCRC; + } } void Message::dump(Formatter *f) const diff --git a/src/msg/Pipe.cc b/src/msg/Pipe.cc index 27d211cf59e..563bb2bb839 100644 --- a/src/msg/Pipe.cc +++ b/src/msg/Pipe.cc @@ -1860,17 +1860,10 @@ int Pipe::write_keepalive() int Pipe::write_message(Message *m) { - ceph_msg_header& header = m->get_header(); - ceph_msg_footer& footer = m->get_footer(); + const ceph_msg_header& header = m->get_header(); + const ceph_msg_footer& footer = m->get_footer(); int ret; - // get envelope, buffers - header.front_len = m->get_payload().length(); - header.middle_len = m->get_middle().length(); - header.data_len = m->get_data().length(); - footer.flags = CEPH_MSG_FOOTER_COMPLETE; - m->calc_header_crc(); - // Now that we have all the crcs calculated, handle the digital signature for the message, if the // pipe has session security set up. Some session security options do not actually calculate and // check the signature, but they should handle the calls to sign_message and check_signature. PLR From d16ad9263d7b1d3c096f56c56e9631fae8509651 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 6 Jan 2013 08:38:27 -0800 Subject: [PATCH 7/7] msg/Pipe: prepare Message data for wire under pipe_lock We cannot trust the Message bufferlists or other structures to be stable without pipe_lock, as another Pipe may claim and modify the sent list items while we are writing to the socket. Related to #3678. Signed-off-by: Sage Weil --- src/msg/Pipe.cc | 51 ++++++++++++++++++++++++++----------------------- src/msg/Pipe.h | 2 +- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/src/msg/Pipe.cc b/src/msg/Pipe.cc index 563bb2bb839..0add14b441e 100644 --- a/src/msg/Pipe.cc +++ b/src/msg/Pipe.cc @@ -1475,10 +1475,35 @@ void Pipe::writer() // encode and copy out of *m m->encode(connection_state->get_features(), !msgr->cct->_conf->ms_nocrc); + // prepare everything + ceph_msg_header& header = m->get_header(); + ceph_msg_footer& footer = m->get_footer(); + + // Now that we have all the crcs calculated, handle the + // digital signature for the message, if the pipe has session + // security set up. Some session security options do not + // actually calculate and check the signature, but they should + // handle the calls to sign_message and check_signature. PLR + if (session_security == NULL) { + ldout(msgr->cct, 20) << "writer no session security" << dendl; + } else { + if (session_security->sign_message(m)) { + ldout(msgr->cct, 20) << "writer failed to sign seq # " << header.seq + << "): sig = " << footer.sig << dendl; + } else { + ldout(msgr->cct, 20) << "writer signed seq # " << header.seq + << "): sig = " << footer.sig << dendl; + } + } + + bufferlist blist = m->get_payload(); + blist.append(m->get_middle()); + blist.append(m->get_data()); + pipe_lock.Unlock(); ldout(msgr->cct,20) << "writer sending " << m->get_seq() << " " << m << dendl; - int rc = write_message(m); + int rc = write_message(header, footer, blist); pipe_lock.Lock(); if (rc < 0) { @@ -1858,32 +1883,10 @@ int Pipe::write_keepalive() } -int Pipe::write_message(Message *m) +int Pipe::write_message(ceph_msg_header& header, ceph_msg_footer& footer, bufferlist& blist) { - const ceph_msg_header& header = m->get_header(); - const ceph_msg_footer& footer = m->get_footer(); int ret; - // Now that we have all the crcs calculated, handle the digital signature for the message, if the - // pipe has session security set up. Some session security options do not actually calculate and - // check the signature, but they should handle the calls to sign_message and check_signature. PLR - - if (session_security == NULL) { - ldout(msgr->cct, 20) << "Pipe: write_message: session security NULL for this pipe." << dendl; - } else { - if (session_security->sign_message(m)) { - ldout(msgr->cct, 20) << "Failed to put signature in client message (seq # " << header.seq << "): sig = " << footer.sig << dendl; - } else { - ldout(msgr->cct, 20) << "Put signature in client message (seq # " << header.seq << "): sig = " << footer.sig << dendl; - } - } - - bufferlist blist = m->get_payload(); - blist.append(m->get_middle()); - blist.append(m->get_data()); - - ldout(msgr->cct,20) << "write_message " << m << dendl; - // set up msghdr and iovecs struct msghdr msg; memset(&msg, 0, sizeof(msg)); diff --git a/src/msg/Pipe.h b/src/msg/Pipe.h index 8f3ba641cf6..3eeae705bd7 100644 --- a/src/msg/Pipe.h +++ b/src/msg/Pipe.h @@ -178,7 +178,7 @@ class DispatchQueue; int randomize_out_seq(); int read_message(Message **pm); - int write_message(Message *m); + int write_message(ceph_msg_header& h, ceph_msg_footer& f, bufferlist& body); /** * Write the given data (of length len) to the Pipe's socket. This function * will loop until all passed data has been written out.