Merge branch 'wip-osd-fsid'

This commit is contained in:
Sage Weil 2011-12-19 13:24:46 -08:00
commit 98a4809ad2
13 changed files with 221 additions and 69 deletions

View File

@ -69,6 +69,21 @@ Options
resize the journal or need to otherwise destroy it: this guarantees
you won't lose data.
.. option:: --get-cluster-fsid
Print the cluster fsid (uuid) and exit.
.. option:: --get-osd-fsid
Print the OSD's fsid and exit. The OSD's uuid is generated at
--mkfs time and is thus unique to a particular instantiation of
this OSD.
.. option:: --get-journal-fsid
Print the journal's uuid. The journal fsid is set to match the OSD
fsid at --mkfs time.
.. option:: -c ceph.conf, --conf=ceph.conf
Use *ceph.conf* configuration file instead of the default

View File

@ -65,6 +65,9 @@ int main(int argc, const char **argv)
bool mkkey = false;
bool flushjournal = false;
bool convertfilestore = false;
bool get_journal_fsid = false;
bool get_osd_fsid = false;
bool get_cluster_fsid = false;
std::string dump_pg_log;
std::string val;
@ -86,6 +89,12 @@ int main(int argc, const char **argv)
convertfilestore = true;
} else if (ceph_argparse_witharg(args, i, &val, "--dump-pg-log", (char*)NULL)) {
dump_pg_log = val;
} else if (ceph_argparse_flag(args, i, "--get-cluster-fsid", (char*)NULL)) {
get_cluster_fsid = true;
} else if (ceph_argparse_flag(args, i, "--get-osd-fsid", (char*)NULL)) {
get_osd_fsid = true;
} else if (ceph_argparse_flag(args, i, "--get-journal-fsid", (char*)NULL)) {
get_journal_fsid = true;
} else {
++i;
}
@ -213,9 +222,9 @@ int main(int argc, const char **argv)
}
string magic;
uuid_d fsid;
uuid_d cluster_fsid, osd_fsid;
int w;
int r = OSD::peek_meta(g_conf->osd_data, magic, fsid, w);
int r = OSD::peek_meta(g_conf->osd_data, magic, cluster_fsid, osd_fsid, w);
if (r < 0) {
derr << TEXT_RED << " ** ERROR: unable to open OSD superblock on "
<< g_conf->osd_data << ": " << cpp_strerror(-r)
@ -236,6 +245,22 @@ int main(int argc, const char **argv)
exit(1);
}
if (get_cluster_fsid) {
cout << cluster_fsid << std::endl;
exit(0);
}
if (get_osd_fsid) {
cout << osd_fsid << std::endl;
exit(0);
}
if (get_journal_fsid) {
uuid_d fsid;
int r = OSD::peek_journal_fsid(g_conf->osd_journal, fsid);
if (r == 0)
cout << fsid << std::endl;
exit(r);
}
pick_addresses(g_ceph_context);
if (g_conf->public_addr.is_blank_ip() && !g_conf->cluster_addr.is_blank_ip()) {
@ -262,7 +287,7 @@ int main(int argc, const char **argv)
global_print_banner();
cout << "starting osd." << whoami
<< " at " << client_messenger->get_ms_addr()
<< " at " << client_messenger->get_ms_addr()
<< " osd_data " << g_conf->osd_data
<< " " << ((g_conf->osd_journal.empty()) ?
"(no journal)" : g_conf->osd_journal)

View File

@ -572,8 +572,9 @@ bool OSDMonitor::preprocess_boot(MOSDBoot *m)
goto ignore;
}
if (m->sb.fsid != mon->monmap->fsid) {
dout(0) << "preprocess_boot on fsid " << m->sb.fsid << " != " << mon->monmap->fsid << dendl;
if (m->sb.cluster_fsid != mon->monmap->fsid) {
dout(0) << "preprocess_boot on fsid " << m->sb.cluster_fsid
<< " != " << mon->monmap->fsid << dendl;
goto ignore;
}

View File

@ -325,14 +325,14 @@ int FileJournal::create()
int64_t needed_space;
int ret;
buffer::ptr bp;
dout(2) << "create " << fn << dendl;
dout(2) << "create " << fn << " fsid " << fsid << dendl;
ret = _open(true, true);
if (ret < 0)
goto done;
// write empty header
memset(&header, 0, sizeof(header));
header = header_t();
header.clear();
header.fsid = fsid;
header.max_size = max_size;
@ -394,9 +394,21 @@ done:
return ret;
}
int FileJournal::peek_fsid(uuid_d& fsid)
{
int r = _open(false, false);
if (r < 0)
return r;
r = read_header();
if (r < 0)
return r;
fsid = header.fsid;
return 0;
}
int FileJournal::open(uint64_t fs_op_seq)
{
dout(2) << "open " << fn << " fs_op_seq " << fs_op_seq << dendl;
dout(2) << "open " << fn << " fsid " << fsid << " fs_op_seq " << fs_op_seq << dendl;
last_committed_seq = fs_op_seq;
uint64_t next_seq = fs_op_seq + 1;
@ -417,8 +429,8 @@ int FileJournal::open(uint64_t fs_op_seq)
//<< " vs expected fsid = " << fsid
<< dendl;
if (header.fsid != fsid) {
derr << "FileJournal::open: open fsid doesn't match, invalid "
<< "(someone else's?) journal" << dendl;
derr << "FileJournal::open: ondisk fsid " << header.fsid << " doesn't match expected " << fsid
<< ", invalid (someone else's?) journal" << dendl;
return -EINVAL;
}
if (header.max_size > max_size) {
@ -523,17 +535,23 @@ void FileJournal::print_header()
int FileJournal::read_header()
{
int r;
dout(10) << "read_header" << dendl;
if (directio) {
buffer::ptr bp = buffer::create_page_aligned(block_size);
bp.zero();
r = ::pread(fd, bp.c_str(), bp.length(), 0);
memcpy(&header, bp.c_str(), sizeof(header));
} else {
memset(&header, 0, sizeof(header)); // zero out (read may fail)
r = ::pread(fd, &header, sizeof(header), 0);
bufferlist bl;
buffer::ptr bp = buffer::create_page_aligned(block_size);
bp.zero();
int r = ::pread(fd, bp.c_str(), bp.length(), 0);
bl.push_back(bp);
try {
bufferlist::iterator p = bl.begin();
::decode(header, p);
}
catch (buffer::error& e) {
derr << "read_header error decoding journal header" << dendl;
return -EINVAL;
}
if (r < 0) {
char buf[80];
dout(0) << "read_header error " << errno << " " << strerror_r(errno, buf, sizeof(buf)) << dendl;
@ -546,9 +564,11 @@ int FileJournal::read_header()
bufferptr FileJournal::prepare_header()
{
bufferlist bl;
::encode(header, bl);
bufferptr bp = buffer::create_page_aligned(get_top());
bp.zero();
memcpy(bp.c_str(), &header, sizeof(header));
memcpy(bp.c_str(), bl.c_str(), bl.length());
return bp;
}
@ -728,7 +748,7 @@ int FileJournal::prepare_single_write(bufferlist& bl, off64_t& queue_pos, uint64
h.pre_pad = pre_pad;
h.len = ebl.length();
h.post_pad = post_pad;
h.make_magic(queue_pos, header.fsid);
h.make_magic(queue_pos, header.get_fsid64());
bl.append((const char*)&h, sizeof(h));
if (pre_pad) {
@ -1144,7 +1164,7 @@ bool FileJournal::read_entry(bufferlist& bl, uint64_t& seq)
wrap_read_bl(pos, sizeof(*h), hbl);
h = (entry_header_t *)hbl.c_str();
if (!h->check_magic(read_pos, header.fsid)) {
if (!h->check_magic(read_pos, header.get_fsid64())) {
dout(2) << "read_entry " << read_pos << " : bad header magic, end of journal" << dendl;
return false;
}

View File

@ -31,20 +31,63 @@ public:
* journal header
*/
struct header_t {
__u32 version;
__u32 flags;
uint64_t fsid;
uint64_t flags;
uuid_d fsid;
__u32 block_size;
__u32 alignment;
int64_t max_size; // max size of journal ring buffer
int64_t start; // offset of first entry
header_t() : version(1), flags(0), fsid(0), block_size(0), alignment(0), max_size(0), start(0) {}
void clear() {
start = block_size;
}
} header __attribute__((__packed__, aligned(4)));
uint64_t get_fsid64() {
return *(uint64_t*)&fsid.uuid[0];
}
void encode(bufferlist& bl) const {
__u32 v = 2;
::encode(v, bl);
bufferlist em;
{
::encode(flags, em);
::encode(fsid, em);
::encode(block_size, em);
::encode(alignment, em);
::encode(max_size, em);
::encode(start, em);
}
::encode(em, bl);
}
void decode(bufferlist::iterator& bl) {
__u32 v;
::decode(v, bl);
if (v < 2) { // normally 0, but concievably 1
// decode old header_t struct (pre v0.40).
bl.advance(4); // skip __u32 flags (it was unused by any old code)
flags = 0;
uint64_t tfsid;
::decode(tfsid, bl);
*(uint64_t*)&fsid.uuid[0] = tfsid;
*(uint64_t*)&fsid.uuid[8] = tfsid;
::decode(block_size, bl);
::decode(alignment, bl);
::decode(max_size, bl);
::decode(start, bl);
return;
}
bufferlist em;
::decode(em, bl);
bufferlist::iterator t = em.begin();
::decode(flags, t);
::decode(fsid, t);
::decode(block_size, t);
::decode(alignment, t);
::decode(max_size, t);
::decode(start, t);
}
} header;
struct entry_header_t {
uint64_t seq; // fs op seq #
@ -171,7 +214,7 @@ private:
}
public:
FileJournal(uint64_t fsid, Finisher *fin, Cond *sync_cond, const char *f, bool dio=false) :
FileJournal(uuid_d fsid, Finisher *fin, Cond *sync_cond, const char *f, bool dio=false) :
Journal(fsid, fin, sync_cond), fn(f),
zero_buf(NULL),
max_size(0), block_size(0),
@ -193,6 +236,7 @@ private:
int create();
int open(uint64_t fs_op_seq);
void close();
int peek_fsid(uuid_d& fsid);
void flush();
@ -217,4 +261,6 @@ private:
bool read_entry(bufferlist& bl, uint64_t& seq);
};
WRITE_CLASS_ENCODER(FileJournal::header_t)
#endif

View File

@ -584,7 +584,6 @@ done:
FileStore::FileStore(const std::string &base, const std::string &jdev) :
basedir(base), journalpath(jdev),
fsid(0),
btrfs(false),
btrfs_stable_commits(false),
blk_size(0),
@ -892,10 +891,13 @@ int FileStore::mkfs()
}
// fsid
srand(time(0) + getpid());
fsid = rand();
ret = safe_write(fsid_fd, &fsid, sizeof(fsid));
if (ret) {
fsid.generate_random();
char fsid_str[40];
fsid.print(fsid_str);
strcat(fsid_str, "\n");
ret = safe_write(fsid_fd, fsid_str, strlen(fsid_str));
if (ret < 0) {
derr << "FileStore::mkfs: failed to write fsid: "
<< cpp_strerror(ret) << dendl;
goto close_fsid_fd;
@ -1002,7 +1004,7 @@ int FileStore::mkjournal()
derr << "FileStore::mkjournal: open error: " << cpp_strerror(err) << dendl;
return -err;
}
ret = safe_read(fd, &fsid, sizeof(fsid));
ret = read_fsid(fd);
if (ret < 0) {
derr << "FileStore::mkjournal: read error: " << cpp_strerror(ret) << dendl;
TEMP_FAILURE_RETRY(::close(fd));
@ -1029,6 +1031,26 @@ int FileStore::mkjournal()
return ret;
}
int FileStore::read_fsid(int fd)
{
char fsid_str[40];
int ret = safe_read(fd, fsid_str, sizeof(fsid_str));
if (ret < 0)
return ret;
if (ret == 8) {
// old 64-bit fsid... mirror it.
*(uint64_t*)&fsid.uuid[0] = *(uint64_t*)fsid_str;
*(uint64_t*)&fsid.uuid[8] = *(uint64_t*)fsid_str;
return 0;
}
if (ret > 36)
fsid_str[36] = 0;
if (!fsid.parse(fsid_str))
return -EINVAL;
return 0;
}
int FileStore::lock_fsid()
{
struct flock l;
@ -1433,7 +1455,7 @@ int FileStore::mount()
// get fsid
snprintf(buf, sizeof(buf), "%s/fsid", basedir.c_str());
fsid_fd = ::open(buf, O_RDWR|O_CREAT, 0644);
fsid_fd = ::open(buf, O_RDWR, 0644);
if (fsid_fd < 0) {
ret = -errno;
derr << "FileStore::mount: error opening '" << buf << "': "
@ -1441,9 +1463,8 @@ int FileStore::mount()
goto done;
}
fsid = 0;
ret = safe_read_exact(fsid_fd, &fsid, sizeof(fsid));
if (ret) {
ret = read_fsid(fsid_fd);
if (ret < 0) {
derr << "FileStore::mount: error reading fsid_fd: " << cpp_strerror(ret)
<< dendl;
goto close_fsid_fd;
@ -1725,7 +1746,6 @@ close_basedir_fd:
TEMP_FAILURE_RETRY(::close(basedir_fd));
basedir_fd = -1;
close_fsid_fd:
fsid = 0;
TEMP_FAILURE_RETRY(::close(fsid_fd));
fsid_fd = -1;
done:

View File

@ -28,6 +28,8 @@
#include "Fake.h"
#include "include/uuid.h"
#include <map>
#include <deque>
using namespace std;
@ -45,7 +47,7 @@ class FileStore : public JournalingObjectStore,
string basedir, journalpath;
std::string current_fn;
std::string current_op_seq_fn;
uint64_t fsid;
uuid_d fsid;
bool btrfs; ///< fs is btrfs
bool btrfs_stable_commits; ///< we are using btrfs snapshots for a stable journal refernce
@ -80,6 +82,7 @@ class FileStore : public JournalingObjectStore,
// helper fns
int get_cdir(coll_t cid, char *s, int len);
int read_fsid(int fd);
int lock_fsid();
// sync thread
@ -320,6 +323,8 @@ public:
void flush();
void sync_and_flush();
uuid_d get_fsid() { return fsid; }
int snapshot(const string& name);
// attrs

View File

@ -24,7 +24,7 @@ class PerfCounters;
class Journal {
protected:
uint64_t fsid;
uuid_d fsid;
Finisher *finisher;
public:
PerfCounters *logger;
@ -33,7 +33,7 @@ protected:
bool wait_on_full;
public:
Journal(uint64_t f, Finisher *fin, Cond *c=0) :
Journal(uuid_d f, Finisher *fin, Cond *c=0) :
fsid(f), finisher(fin), logger(NULL),
do_sync_cond(c),
wait_on_full(false) { }

View File

@ -665,6 +665,7 @@ public:
virtual void _fake_writes(bool b) {};
virtual void _get_frag_stat(FragmentationStat& st) {};
virtual uuid_d get_fsid() = 0;
};

View File

@ -35,6 +35,7 @@
#include "common/ceph_argparse.h"
#include "os/FileStore.h"
#include "os/FileJournal.h"
#include "ReplicatedPG.h"
@ -263,7 +264,8 @@ int OSD::mkfs(const std::string &dev, const std::string &jdev, uuid_d fsid, int
int ret;
ObjectStore *store = NULL;
OSDSuperblock sb;
sb.fsid = fsid;
sb.cluster_fsid = fsid;
sb.whoami = whoami;
try {
@ -277,13 +279,15 @@ int OSD::mkfs(const std::string &dev, const std::string &jdev, uuid_d fsid, int
derr << "OSD::mkfs: FileStore::mkfs failed with error " << ret << dendl;
goto free_store;
}
sb.osd_fsid = store->get_fsid();
ret = store->mount();
if (ret) {
derr << "OSD::mkfs: couldn't mount FileStore: error " << ret << dendl;
goto free_store;
}
store->sync_and_flush();
ret = write_meta(dev, fsid, whoami);
ret = write_meta(dev, sb.cluster_fsid, sb.osd_fsid, whoami);
if (ret) {
derr << "OSD::mkfs: failed to write fsid file: error " << ret << dendl;
goto umount_store;
@ -443,14 +447,7 @@ int OSD::read_meta(const std::string &base, const std::string &file,
return len;
}
#define FSID_FORMAT "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" \
"%02x%02x%02x%02x%02x%02x"
#define PR_FSID(f) (f)->fsid[0], (f)->fsid[1], (f)->fsid[2], (f)->fsid[3], \
(f)->fsid[4], (f)->fsid[5], (f)->fsid[6], (f)->fsid[7], \
(f)->fsid[8], (f)->fsid[9], (f)->fsid[10], (f)->fsid[11], \
(f)->fsid[12], (f)->fsid[13], (f)->fsid[14], (f)->fsid[15]
int OSD::write_meta(const std::string &base, uuid_d& fsid, int whoami)
int OSD::write_meta(const std::string &base, uuid_d& cluster_fsid, uuid_d& osd_fsid, int whoami)
{
char val[80];
@ -460,15 +457,15 @@ int OSD::write_meta(const std::string &base, uuid_d& fsid, int whoami)
snprintf(val, sizeof(val), "%d\n", whoami);
write_meta(base, "whoami", val, strlen(val));
fsid.print(val);
cluster_fsid.print(val);
strcat(val, "\n");
write_meta(base, "ceph_fsid", val, strlen(val));
return 0;
}
int OSD::peek_meta(const std::string &dev, std::string& magic,
uuid_d& fsid, int& whoami)
uuid_d& cluster_fsid, uuid_d& osd_fsid, int& whoami)
{
char val[80] = { 0 };
@ -485,12 +482,28 @@ int OSD::peek_meta(const std::string &dev, std::string& magic,
if (read_meta(dev, "ceph_fsid", val, sizeof(val)) < 0)
return -errno;
memset(&fsid, 0, sizeof(fsid));
fsid.parse(val);
if (strlen(val) > 36)
val[36] = 0;
cluster_fsid.parse(val);
if (read_meta(dev, "fsid", val, sizeof(val)) < 0)
osd_fsid = uuid_d();
else {
if (strlen(val) > 36)
val[36] = 0;
osd_fsid.parse(val);
}
return 0;
}
int OSD::peek_journal_fsid(string path, uuid_d& fsid)
{
FileJournal j(fsid, 0, 0, path.c_str());
return j.peek_fsid(fsid);
}
#undef dout_prefix
#define dout_prefix _prefix(_dout, whoami, osdmap)
@ -1530,9 +1543,9 @@ void OSD::reset_heartbeat_peers()
void OSD::handle_osd_ping(MOSDPing *m)
{
if (superblock.fsid != m->fsid) {
if (superblock.cluster_fsid != m->fsid) {
dout(20) << "handle_osd_ping from " << m->get_source_inst()
<< " bad fsid " << m->fsid << " != " << superblock.fsid << dendl;
<< " bad fsid " << m->fsid << " != " << superblock.cluster_fsid << dendl;
m->put();
return;
}

View File

@ -1056,9 +1056,12 @@ private:
const char *val, size_t vallen);
static int read_meta(const std::string &base, const std::string &file,
char *val, size_t vallen);
static int write_meta(const std::string &base, uuid_d& fsid, int whoami);
static int write_meta(const std::string &base,
uuid_d& cluster_fsid, uuid_d& osd_fsid, int whoami);
public:
static int peek_meta(const std::string &dev, string& magic, uuid_d& fsid, int& whoami);
static int peek_meta(const std::string &dev, string& magic,
uuid_d& cluster_fsid, uuid_d& osd_fsid, int& whoami);
static int peek_journal_fsid(std::string jpath, uuid_d& fsid);
// startup/shutdown

View File

@ -522,10 +522,10 @@ ostream& operator<<(ostream& out, const pg_pool_t& p)
void OSDSuperblock::encode(bufferlist &bl) const
{
__u8 v = 3;
__u8 v = 4;
::encode(v, bl);
::encode(fsid, bl);
::encode(cluster_fsid, bl);
::encode(whoami, bl);
::encode(current_epoch, bl);
::encode(oldest_map, bl);
@ -534,6 +534,7 @@ void OSDSuperblock::encode(bufferlist &bl) const
compat_features.encode(bl);
::encode(clean_thru, bl);
::encode(mounted, bl);
::encode(osd_fsid, bl);
}
void OSDSuperblock::decode(bufferlist::iterator &bl)
@ -545,7 +546,7 @@ void OSDSuperblock::decode(bufferlist::iterator &bl)
string magic;
::decode(magic, bl);
}
::decode(fsid, bl);
::decode(cluster_fsid, bl);
::decode(whoami, bl);
::decode(current_epoch, bl);
::decode(oldest_map, bl);
@ -558,6 +559,8 @@ void OSDSuperblock::decode(bufferlist::iterator &bl)
}
::decode(clean_thru, bl);
::decode(mounted, bl);
if (v >= 4)
::decode(osd_fsid, bl);
}

View File

@ -1107,7 +1107,7 @@ inline ostream& operator<<(ostream& out, const ObjectExtent &ex)
class OSDSuperblock {
public:
uuid_d fsid;
uuid_d cluster_fsid, osd_fsid;
int32_t whoami; // my role in this fs.
epoch_t current_epoch; // most recent epoch
epoch_t oldest_map, newest_map; // oldest/newest maps we have.
@ -1123,7 +1123,6 @@ public:
whoami(-1),
current_epoch(0), oldest_map(0), newest_map(0), weight(0),
mounted(0), clean_thru(0) {
memset(&fsid, 0, sizeof(fsid));
}
void encode(bufferlist &bl) const;
@ -1133,8 +1132,9 @@ WRITE_CLASS_ENCODER(OSDSuperblock)
inline ostream& operator<<(ostream& out, const OSDSuperblock& sb)
{
return out << "sb(" << sb.fsid
return out << "sb(" << sb.cluster_fsid
<< " osd." << sb.whoami
<< " " << sb.osd_fsid
<< " e" << sb.current_epoch
<< " [" << sb.oldest_map << "," << sb.newest_map << "]"
<< " lci=[" << sb.mounted << "," << sb.clean_thru << "]"