mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
mds: update segment references during journal rewrite
... to avoid leaving log events that reference log
segments by offsets which no longer exist.
Signed-off-by: John Spray <john.spray@redhat.com>
(cherry picked from commit 386f2d7c82
)
Reviewed-by: Greg Farnum <greg@inktank.com>
This commit is contained in:
parent
9f4c687288
commit
c3c6468cad
@ -33,9 +33,11 @@ class CDentry;
|
||||
class MDS;
|
||||
struct MDSlaveUpdate;
|
||||
|
||||
typedef uint64_t log_segment_seq_t;
|
||||
|
||||
class LogSegment {
|
||||
public:
|
||||
const uint64_t seq;
|
||||
const log_segment_seq_t seq;
|
||||
uint64_t offset, end;
|
||||
int num_events;
|
||||
|
||||
|
@ -850,6 +850,13 @@ void MDLog::_reformat_journal(JournalPointer const &jp_in, Journaler *old_journa
|
||||
// write them to the new journal.
|
||||
int r = 0;
|
||||
|
||||
// In old format journals before event_seq was introduced, the serialized
|
||||
// offset of a SubtreeMap message in the log is used as the unique ID for
|
||||
// a log segment. Because we change serialization, this will end up changing
|
||||
// for us, so we have to explicitly update the fields that point back to that
|
||||
// log segment.
|
||||
std::map<log_segment_seq_t, log_segment_seq_t> segment_pos_rewrite;
|
||||
|
||||
// The logic in here borrowed from replay_thread expects mds_lock to be held,
|
||||
// e.g. between checking readable and doing wait_for_readable so that journaler
|
||||
// state doesn't change in between.
|
||||
@ -879,11 +886,63 @@ void MDLog::_reformat_journal(JournalPointer const &jp_in, Journaler *old_journa
|
||||
// Read one serialized LogEvent
|
||||
assert(old_journal->is_readable());
|
||||
bufferlist bl;
|
||||
uint64_t le_pos = old_journal->get_read_pos();
|
||||
bool r = old_journal->try_read_entry(bl);
|
||||
if (!r && old_journal->get_error())
|
||||
continue;
|
||||
assert(r);
|
||||
|
||||
// Update segment_pos_rewrite
|
||||
LogEvent *le = LogEvent::decode(bl);
|
||||
if (le) {
|
||||
bool modified = false;
|
||||
|
||||
if (le->get_type() == EVENT_SUBTREEMAP ||
|
||||
le->get_type() == EVENT_RESETJOURNAL) {
|
||||
ESubtreeMap *sle = dynamic_cast<ESubtreeMap*>(le);
|
||||
if (sle == NULL || sle->event_seq == 0) {
|
||||
// A non-explicit event seq: the effective sequence number
|
||||
// of this segment is it's position in the old journal and
|
||||
// the new effective sequence number will be its position
|
||||
// in the new journal.
|
||||
segment_pos_rewrite[le_pos] = new_journal->get_write_pos();
|
||||
dout(20) << __func__ << " discovered segment seq mapping "
|
||||
<< le_pos << " -> " << new_journal->get_write_pos() << dendl;
|
||||
}
|
||||
} else {
|
||||
event_seq++;
|
||||
}
|
||||
|
||||
// Rewrite segment references if necessary
|
||||
EMetaBlob *blob = le->get_metablob();
|
||||
if (blob) {
|
||||
modified = blob->rewrite_truncate_finish(mds, segment_pos_rewrite);
|
||||
}
|
||||
|
||||
// Zero-out expire_pos in subtreemap because offsets have changed
|
||||
// (expire_pos is just an optimization so it's safe to eliminate it)
|
||||
if (le->get_type() == EVENT_SUBTREEMAP) {
|
||||
dout(20) << __func__ << " zeroing expire_pos in subtreemap event at " << le_pos << dendl;
|
||||
ESubtreeMap *sle = dynamic_cast<ESubtreeMap*>(le);
|
||||
assert(sle != NULL);
|
||||
sle->expire_pos = 0;
|
||||
modified = true;
|
||||
}
|
||||
|
||||
if (modified) {
|
||||
bl.clear();
|
||||
le->encode_with_header(bl);
|
||||
}
|
||||
|
||||
delete le;
|
||||
} else {
|
||||
// Failure from LogEvent::decode, our job is to change the journal wrapper,
|
||||
// not validate the contents, so pass it through.
|
||||
dout(1) << __func__ << " transcribing un-decodable LogEvent at old position "
|
||||
<< old_journal->get_read_pos() << ", new position " << new_journal->get_write_pos()
|
||||
<< dendl;
|
||||
}
|
||||
|
||||
// Write (buffered, synchronous) one serialized LogEvent
|
||||
events_transcribed += 1;
|
||||
new_journal->append_entry(bl);
|
||||
|
@ -220,7 +220,7 @@ public:
|
||||
return segments.rbegin()->second;
|
||||
}
|
||||
|
||||
LogSegment *get_segment(uint64_t seq) {
|
||||
LogSegment *get_segment(log_segment_seq_t seq) {
|
||||
if (segments.count(seq))
|
||||
return segments[seq];
|
||||
return NULL;
|
||||
|
@ -345,7 +345,7 @@ private:
|
||||
void handle_signal(int signum);
|
||||
|
||||
// who am i etc
|
||||
int get_nodeid() { return whoami; }
|
||||
int get_nodeid() const { return whoami; }
|
||||
uint64_t get_metadata_pool() { return mdsmap->get_metadata_pool(); }
|
||||
MDSMap *get_mds_map() { return mdsmap; }
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "../CInode.h"
|
||||
#include "../CDir.h"
|
||||
#include "../CDentry.h"
|
||||
#include "../LogSegment.h"
|
||||
|
||||
#include "include/triple.h"
|
||||
#include "include/interval_set.h"
|
||||
@ -305,7 +306,7 @@ private:
|
||||
|
||||
// inodes i've truncated
|
||||
list<inodeno_t> truncate_start; // start truncate
|
||||
map<inodeno_t,uint64_t> truncate_finish; // finished truncate (started in segment blah)
|
||||
map<inodeno_t, log_segment_seq_t> truncate_finish; // finished truncate (started in segment blah)
|
||||
|
||||
public:
|
||||
vector<inodeno_t> destroyed_inodes;
|
||||
@ -374,6 +375,8 @@ private:
|
||||
void add_truncate_finish(inodeno_t ino, uint64_t segoff) {
|
||||
truncate_finish[ino] = segoff;
|
||||
}
|
||||
|
||||
bool rewrite_truncate_finish(MDS const *mds, std::map<uint64_t, uint64_t> const &old_to_new);
|
||||
|
||||
void add_destroyed_inode(inodeno_t ino) {
|
||||
destroyed_inodes.push_back(ino);
|
||||
|
@ -3022,3 +3022,39 @@ void ENoOp::replay(MDS *mds)
|
||||
{
|
||||
dout(4) << "ENoOp::replay, " << pad_size << " bytes skipped in journal" << dendl;
|
||||
}
|
||||
|
||||
/**
|
||||
* If re-formatting an old journal that used absolute log position
|
||||
* references as segment sequence numbers, use this function to update
|
||||
* it.
|
||||
*
|
||||
* @param mds
|
||||
* MDS instance, just used for logging
|
||||
* @param old_to_new
|
||||
* Map of old journal segment segment sequence numbers to new journal segment sequence numbers
|
||||
*
|
||||
* @return
|
||||
* True if the event was modified.
|
||||
*/
|
||||
bool EMetaBlob::rewrite_truncate_finish(MDS const *mds,
|
||||
std::map<log_segment_seq_t, log_segment_seq_t> const &old_to_new)
|
||||
{
|
||||
bool modified = false;
|
||||
map<inodeno_t, log_segment_seq_t> new_trunc_finish;
|
||||
for (std::map<inodeno_t, log_segment_seq_t>::iterator i = truncate_finish.begin();
|
||||
i != truncate_finish.end(); ++i) {
|
||||
if (old_to_new.count(i->second)) {
|
||||
dout(20) << __func__ << " applying segment seq mapping "
|
||||
<< i->second << " -> " << old_to_new.find(i->second)->second << dendl;
|
||||
new_trunc_finish[i->first] = old_to_new.find(i->second)->second;
|
||||
modified = true;
|
||||
} else {
|
||||
dout(20) << __func__ << " no segment seq mapping found for "
|
||||
<< i->second << dendl;
|
||||
new_trunc_finish[i->first] = i->second;
|
||||
}
|
||||
}
|
||||
truncate_finish = new_trunc_finish;
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user