mds: update segment references during journal rewrite

... to avoid leaving log events that reference log
segments by offsets which no longer exist.

Signed-off-by: John Spray <john.spray@redhat.com>
(cherry picked from commit 386f2d7c82)
Reviewed-by: Greg Farnum <greg@inktank.com>
This commit is contained in:
John Spray 2014-09-11 14:07:59 +01:00 committed by Greg Farnum
parent 9f4c687288
commit c3c6468cad
6 changed files with 104 additions and 4 deletions

View File

@ -33,9 +33,11 @@ class CDentry;
class MDS;
struct MDSlaveUpdate;
typedef uint64_t log_segment_seq_t;
class LogSegment {
public:
const uint64_t seq;
const log_segment_seq_t seq;
uint64_t offset, end;
int num_events;

View File

@ -850,6 +850,13 @@ void MDLog::_reformat_journal(JournalPointer const &jp_in, Journaler *old_journa
// write them to the new journal.
int r = 0;
// In old format journals before event_seq was introduced, the serialized
// offset of a SubtreeMap message in the log is used as the unique ID for
// a log segment. Because we change serialization, this will end up changing
// for us, so we have to explicitly update the fields that point back to that
// log segment.
std::map<log_segment_seq_t, log_segment_seq_t> segment_pos_rewrite;
// The logic in here borrowed from replay_thread expects mds_lock to be held,
// e.g. between checking readable and doing wait_for_readable so that journaler
// state doesn't change in between.
@ -879,11 +886,63 @@ void MDLog::_reformat_journal(JournalPointer const &jp_in, Journaler *old_journa
// Read one serialized LogEvent
assert(old_journal->is_readable());
bufferlist bl;
uint64_t le_pos = old_journal->get_read_pos();
bool r = old_journal->try_read_entry(bl);
if (!r && old_journal->get_error())
continue;
assert(r);
// Update segment_pos_rewrite
LogEvent *le = LogEvent::decode(bl);
if (le) {
bool modified = false;
if (le->get_type() == EVENT_SUBTREEMAP ||
le->get_type() == EVENT_RESETJOURNAL) {
ESubtreeMap *sle = dynamic_cast<ESubtreeMap*>(le);
if (sle == NULL || sle->event_seq == 0) {
// A non-explicit event seq: the effective sequence number
// of this segment is it's position in the old journal and
// the new effective sequence number will be its position
// in the new journal.
segment_pos_rewrite[le_pos] = new_journal->get_write_pos();
dout(20) << __func__ << " discovered segment seq mapping "
<< le_pos << " -> " << new_journal->get_write_pos() << dendl;
}
} else {
event_seq++;
}
// Rewrite segment references if necessary
EMetaBlob *blob = le->get_metablob();
if (blob) {
modified = blob->rewrite_truncate_finish(mds, segment_pos_rewrite);
}
// Zero-out expire_pos in subtreemap because offsets have changed
// (expire_pos is just an optimization so it's safe to eliminate it)
if (le->get_type() == EVENT_SUBTREEMAP) {
dout(20) << __func__ << " zeroing expire_pos in subtreemap event at " << le_pos << dendl;
ESubtreeMap *sle = dynamic_cast<ESubtreeMap*>(le);
assert(sle != NULL);
sle->expire_pos = 0;
modified = true;
}
if (modified) {
bl.clear();
le->encode_with_header(bl);
}
delete le;
} else {
// Failure from LogEvent::decode, our job is to change the journal wrapper,
// not validate the contents, so pass it through.
dout(1) << __func__ << " transcribing un-decodable LogEvent at old position "
<< old_journal->get_read_pos() << ", new position " << new_journal->get_write_pos()
<< dendl;
}
// Write (buffered, synchronous) one serialized LogEvent
events_transcribed += 1;
new_journal->append_entry(bl);

View File

@ -220,7 +220,7 @@ public:
return segments.rbegin()->second;
}
LogSegment *get_segment(uint64_t seq) {
LogSegment *get_segment(log_segment_seq_t seq) {
if (segments.count(seq))
return segments[seq];
return NULL;

View File

@ -345,7 +345,7 @@ private:
void handle_signal(int signum);
// who am i etc
int get_nodeid() { return whoami; }
int get_nodeid() const { return whoami; }
uint64_t get_metadata_pool() { return mdsmap->get_metadata_pool(); }
MDSMap *get_mds_map() { return mdsmap; }

View File

@ -20,6 +20,7 @@
#include "../CInode.h"
#include "../CDir.h"
#include "../CDentry.h"
#include "../LogSegment.h"
#include "include/triple.h"
#include "include/interval_set.h"
@ -305,7 +306,7 @@ private:
// inodes i've truncated
list<inodeno_t> truncate_start; // start truncate
map<inodeno_t,uint64_t> truncate_finish; // finished truncate (started in segment blah)
map<inodeno_t, log_segment_seq_t> truncate_finish; // finished truncate (started in segment blah)
public:
vector<inodeno_t> destroyed_inodes;
@ -374,6 +375,8 @@ private:
void add_truncate_finish(inodeno_t ino, uint64_t segoff) {
truncate_finish[ino] = segoff;
}
bool rewrite_truncate_finish(MDS const *mds, std::map<uint64_t, uint64_t> const &old_to_new);
void add_destroyed_inode(inodeno_t ino) {
destroyed_inodes.push_back(ino);

View File

@ -3022,3 +3022,39 @@ void ENoOp::replay(MDS *mds)
{
dout(4) << "ENoOp::replay, " << pad_size << " bytes skipped in journal" << dendl;
}
/**
* If re-formatting an old journal that used absolute log position
* references as segment sequence numbers, use this function to update
* it.
*
* @param mds
* MDS instance, just used for logging
* @param old_to_new
* Map of old journal segment segment sequence numbers to new journal segment sequence numbers
*
* @return
* True if the event was modified.
*/
bool EMetaBlob::rewrite_truncate_finish(MDS const *mds,
std::map<log_segment_seq_t, log_segment_seq_t> const &old_to_new)
{
bool modified = false;
map<inodeno_t, log_segment_seq_t> new_trunc_finish;
for (std::map<inodeno_t, log_segment_seq_t>::iterator i = truncate_finish.begin();
i != truncate_finish.end(); ++i) {
if (old_to_new.count(i->second)) {
dout(20) << __func__ << " applying segment seq mapping "
<< i->second << " -> " << old_to_new.find(i->second)->second << dendl;
new_trunc_finish[i->first] = old_to_new.find(i->second)->second;
modified = true;
} else {
dout(20) << __func__ << " no segment seq mapping found for "
<< i->second << dendl;
new_trunc_finish[i->first] = i->second;
}
}
truncate_finish = new_trunc_finish;
return modified;
}