From 8a5070fbc54291ad8fa56faae9e486759e13685e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 3 Oct 2008 10:56:45 -0700 Subject: [PATCH 1/3] msgr: don't host msg bufferlists in write_message payload and data need to be preserved in case the connection drops and the messages need to be resent. --- src/msg/SimpleMessenger.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/msg/SimpleMessenger.cc b/src/msg/SimpleMessenger.cc index f8b9d776592..c494add0a04 100644 --- a/src/msg/SimpleMessenger.cc +++ b/src/msg/SimpleMessenger.cc @@ -1878,8 +1878,7 @@ int Rank::Pipe::write_message(Message *m, ceph_msg_header *header, f.front_crc = payload.crc32c(0); f.data_crc = data.crc32c(0); - bufferlist blist; - blist.claim(payload); + bufferlist blist = payload; blist.append(data); dout(20) << "write_message " << m << " to " << header->dst << dendl; From e6dd588fb7e480ff02c32dd676cdabe3bbf03328 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 3 Oct 2008 12:08:32 -0700 Subject: [PATCH 2/3] mds: don't issue leases on dentries in stray dir Also, assert in remove_dentry() that the dentry has no more leases. --- src/mds/CDir.cc | 3 +++ src/mds/Locker.cc | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 5a48a33f22e..a6b5163d9b2 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -316,6 +316,9 @@ void CDir::remove_dentry(CDentry *dn) { dout(12) << "remove_dentry " << *dn << dendl; + // there should be no client leases at this point! + assert(dn->client_lease_map.empty()); + if (dn->inode) { // detach inode and dentry unlink_inode_work(dn); diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index ccb033b0aef..351e0c95677 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1286,7 +1286,8 @@ int Locker::issue_client_lease(CDentry *dn, int client, // if the client is holding EXCL|RDCACHE caps. int mask = 0; CInode *diri = dn->get_dir()->get_inode(); - if ((diri->is_base() || // base inode's don't get version updated, so ICONTENT is useless. + if (!diri->is_stray() && // do not issue dn leases in stray dir! + (diri->is_base() || // base inode's don't get version updated, so ICONTENT is useless. (!diri->dirlock.can_lease() && (diri->get_client_cap_pending(client) & (CEPH_CAP_EXCL|CEPH_CAP_RDCACHE)) == 0)) && dn->lock.can_lease()) From 6f8d6bb66e35e6ff6d29b382af82725ea6a54de5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 3 Oct 2008 12:09:05 -0700 Subject: [PATCH 3/3] msgr: clean up crc handling --- src/kernel/messenger.c | 16 ++++----- src/msg/Message.cc | 3 -- src/msg/Message.h | 13 ++++++-- src/msg/SimpleMessenger.cc | 68 ++++++++++++++++++++------------------ src/msg/SimpleMessenger.h | 3 +- 5 files changed, 54 insertions(+), 49 deletions(-) diff --git a/src/kernel/messenger.c b/src/kernel/messenger.c index d5792f4bba8..8ccbce66833 100644 --- a/src/kernel/messenger.c +++ b/src/kernel/messenger.c @@ -1027,7 +1027,7 @@ static int read_message_partial(struct ceph_connection *con) struct ceph_msg *m = con->in_msg; void *p; int ret; - int want, left; + int to, want, left; unsigned front_len, data_len, data_off; dout(20, "read_message_partial con %p msg %p\n", con, m); @@ -1132,8 +1132,9 @@ static int read_message_partial(struct ceph_connection *con) no_data: /* footer */ - while (con->in_base_pos < sizeof(m->hdr) + sizeof(m->footer)) { - left = sizeof(m->hdr) + sizeof(m->footer) - con->in_base_pos; + to = sizeof(m->hdr) + sizeof(m->footer); + while (con->in_base_pos < to) { + left = to - con->in_base_pos; ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer + (con->in_base_pos - sizeof(m->hdr)), left); @@ -1141,20 +1142,19 @@ no_data: return ret; con->in_base_pos += ret; } - dout(20, "read_message_partial got msg %p\n", m); /* crc ok? */ - if (con->in_front_crc != con->in_msg->footer.front_crc) { + if (con->in_front_crc != m->footer.front_crc) { derr(0, "read_message_partial %p front crc %u != expected %u\n", con->in_msg, - con->in_front_crc, con->in_msg->footer.front_crc); + con->in_front_crc, m->footer.front_crc); return -EIO; } - if (con->in_data_crc != con->in_msg->footer.data_crc) { + if (con->in_data_crc != m->footer.data_crc) { derr(0, "read_message_partial %p data crc %u != expected %u\n", con->in_msg, - con->in_data_crc, con->in_msg->footer.data_crc); + con->in_data_crc, m->footer.data_crc); return -EIO; } diff --git a/src/msg/Message.cc b/src/msg/Message.cc index 9d769963988..e1a0fa22257 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -418,9 +418,6 @@ decode_message(ceph_msg_header& header, ceph_msg_footer& footer, m->decode_payload(); - m->front_crc = front_crc; - m->data_crc = data_crc; - // done! return m; } diff --git a/src/msg/Message.h b/src/msg/Message.h index f3b00df2712..4717c62165d 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -126,8 +126,6 @@ protected: friend class Messenger; public: - __u32 front_crc, data_crc; - Message() { }; Message(int t) { header.type = t; @@ -154,7 +152,16 @@ public: void set_recv_stamp(utime_t t) { recv_stamp = t; } utime_t get_recv_stamp() { return recv_stamp; } - // HEADERELOPE ---- + void calc_header_crc() { + header.crc = crc32c_le(0, (unsigned char*)&header, + sizeof(header) - sizeof(header.crc)); + } + void calc_front_crc() { + footer.front_crc = payload.crc32c(0); + } + void calc_data_crc() { + footer.data_crc = data.crc32c(0); + } // type int get_type() { return header.type; } diff --git a/src/msg/SimpleMessenger.cc b/src/msg/SimpleMessenger.cc index c494add0a04..ae5ad527472 100644 --- a/src/msg/SimpleMessenger.cc +++ b/src/msg/SimpleMessenger.cc @@ -593,7 +593,7 @@ void Rank::EntityMessenger::dispatch_entry() << " <== " << m->get_source_inst() << " ==== " << *m << " ==== " << m->get_payload().length() << "+" << m->get_data().length() - << " (" << m->front_crc << " " << m->data_crc << ")" + << " (" << m->get_footer().front_crc << " " << m->get_footer().data_crc << ")" << " " << m << dendl; dispatch(m); @@ -665,12 +665,15 @@ int Rank::EntityMessenger::send_message(Message *m, entity_inst_t dest) m->set_source_inst(_myinst); m->set_orig_source_inst(_myinst); m->set_dest_inst(dest); + m->calc_data_crc(); dout(1) << m->get_source() << " --> " << dest.name << " " << dest.addr << " -- " << *m - << " -- " << m - << dendl; + << " -- ?+" << m->get_data().length() + << " (? " << m->get_footer().data_crc << ")" + << " " << m + << dendl; rank.submit_message(m, dest.addr); @@ -682,11 +685,14 @@ int Rank::EntityMessenger::forward_message(Message *m, entity_inst_t dest) // set envelope m->set_source_inst(_myinst); m->set_dest_inst(dest); + m->calc_data_crc(); dout(1) << m->get_source() << " **> " << dest.name << " " << dest.addr << " -- " << *m - << " -- " << m + << " -- ?+" << m->get_data().length() + << " (? " << m->get_footer().data_crc << ")" + << " " << m << dendl; rank.submit_message(m, dest.addr); @@ -702,11 +708,14 @@ int Rank::EntityMessenger::lazy_send_message(Message *m, entity_inst_t dest) m->set_source_inst(_myinst); m->set_orig_source_inst(_myinst); m->set_dest_inst(dest); + m->calc_data_crc(); dout(1) << "lazy " << m->get_source() << " --> " << dest.name << " " << dest.addr << " -- " << *m - << " -- " << m + << " -- ?+" << m->get_data().length() + << " (? " << m->get_footer().data_crc << ")" + << " " << m << dendl; rank.submit_message(m, dest.addr, true); @@ -1638,21 +1647,18 @@ void Rank::Pipe::writer() // encode and copy out of *m if (m->empty_payload()) m->encode_payload(); - bufferlist payload, data; - payload.claim(m->get_payload()); - data.claim(m->get_data()); - ceph_msg_header hdr = m->get_header(); + m->calc_front_crc(); lock.Lock(); sent.push_back(m); // move to sent list lock.Unlock(); dout(20) << "writer sending " << m->get_seq() << " " << m << dendl; - int rc = write_message(m, &hdr, payload, data); + int rc = write_message(m); lock.Lock(); if (rc < 0) { - derr(1) << "writer error sending " << m << " to " << hdr.dst << ", " + derr(1) << "writer error sending " << m << " to " << m->get_header().dst << ", " << errno << ": " << strerror(errno) << dendl; fault(); } @@ -1710,7 +1716,7 @@ Message *Rank::Pipe::read_message() // verify header crc __u32 header_crc = crc32c_le(0, (unsigned char *)&header, sizeof(header) - sizeof(header.crc)); if (header_crc != header.crc) { - dout(0) << "reader got bad header crc " << header_crc << " != " << header_crc << dendl; + dout(0) << "reader got bad header crc " << header_crc << " != " << header.crc << dendl; return 0; } @@ -1863,25 +1869,21 @@ int Rank::Pipe::write_ack(unsigned seq) } -int Rank::Pipe::write_message(Message *m, ceph_msg_header *header, - bufferlist &payload, bufferlist &data) +int Rank::Pipe::write_message(Message *m) { - struct ceph_msg_footer f; - memset(&f, 0, sizeof(f)); + ceph_msg_header& header = m->get_header(); + ceph_msg_footer& footer = m->get_footer(); // get envelope, buffers - header->front_len = payload.length(); - header->data_len = data.length(); + header.front_len = m->get_payload().length(); + header.data_len = m->get_data().length(); + footer.aborted = 0; + m->calc_header_crc(); - // calculate header, footer crc - header->crc = crc32c_le(0, (unsigned char*)header, sizeof(*header) - sizeof(header->crc)); - f.front_crc = payload.crc32c(0); - f.data_crc = data.crc32c(0); - - bufferlist blist = payload; - blist.append(data); + bufferlist blist = m->get_payload(); + blist.append(m->get_data()); - dout(20) << "write_message " << m << " to " << header->dst << dendl; + dout(20) << "write_message " << m << " to " << header.dst << dendl; // set up msghdr and iovecs struct msghdr msg; @@ -1898,9 +1900,9 @@ int Rank::Pipe::write_message(Message *m, ceph_msg_header *header, msg.msg_iovlen++; // send envelope - msgvec[msg.msg_iovlen].iov_base = (char*)header; - msgvec[msg.msg_iovlen].iov_len = sizeof(*header); - msglen += sizeof(*header); + msgvec[msg.msg_iovlen].iov_base = (char*)&header; + msgvec[msg.msg_iovlen].iov_len = sizeof(header); + msglen += sizeof(header); msg.msg_iovlen++; // payload (front+data) @@ -1948,10 +1950,10 @@ int Rank::Pipe::write_message(Message *m, ceph_msg_header *header, } assert(left == 0); - // send data footer - msgvec[msg.msg_iovlen].iov_base = (void*)&f; - msgvec[msg.msg_iovlen].iov_len = sizeof(f); - msglen += sizeof(f); + // send footer + msgvec[msg.msg_iovlen].iov_base = (void*)&footer; + msgvec[msg.msg_iovlen].iov_len = sizeof(footer); + msglen += sizeof(footer); msg.msg_iovlen++; // send diff --git a/src/msg/SimpleMessenger.h b/src/msg/SimpleMessenger.h index 2784dd2e520..44c3863d7b4 100644 --- a/src/msg/SimpleMessenger.h +++ b/src/msg/SimpleMessenger.h @@ -133,8 +133,7 @@ private: void writer(); Message *read_message(); - int write_message(Message *m, ceph_msg_header *env, - bufferlist &payload, bufferlist &data); + int write_message(Message *m); int do_sendmsg(int sd, struct msghdr *msg, int len); int write_ack(unsigned s);