Merge pull request #3379 from ceph/wip-mon-drop-conversion

mon: drop store conversion code

Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2015-01-15 11:22:16 -08:00
commit 062d3b0215
3 changed files with 4 additions and 495 deletions

View File

@ -510,40 +510,10 @@ int main(int argc, const char **argv)
}
MonitorDBStore *store = new MonitorDBStore(g_conf->mon_data);
Monitor::StoreConverter converter(g_conf->mon_data, store);
if (store->open(std::cerr) < 0) {
int needs_conversion = converter.needs_conversion();
if (needs_conversion < 0) {
if (needs_conversion == -ENOENT) {
derr << "monitor data directory at '" << g_conf->mon_data
<< "' is not empty but has no valid store nor legacy monitor"
<< " store." << dendl;
} else {
derr << "found errors while validating legacy unconverted"
<< " monitor store: " << cpp_strerror(needs_conversion) << dendl;
}
prefork.exit(1);
}
int ret = store->create_and_open(std::cerr);
if (ret < 0) {
derr << "failed to create new leveldb store" << dendl;
prefork.exit(1);
}
if (needs_conversion > 0) {
dout(0) << "converting monitor store, please do not interrupt..." << dendl;
int r = converter.convert();
if (r) {
derr << "failed to convert monitor store: " << cpp_strerror(r) << dendl;
prefork.exit(1);
}
}
} else if (converter.is_converting()) {
derr << "there is an on-going (maybe aborted?) conversion." << dendl;
derr << "you should check what happened" << dendl;
derr << "remove store.db to restart conversion" << dendl;
err = store->open(std::cerr);
if (err < 0) {
derr << "error opening mon data directory at '"
<< g_conf->mon_data << "': " << cpp_strerror(err) << dendl;
prefork.exit(1);
}

View File

@ -4442,387 +4442,3 @@ bool Monitor::ms_verify_authorizer(Connection *con, int peer_type,
}
return true;
}
#undef dout_prefix
#define dout_prefix *_dout
void Monitor::StoreConverter::_convert_finish_features(
MonitorDBStore::TransactionRef t)
{
dout(20) << __func__ << dendl;
assert(db->exists(MONITOR_NAME, COMPAT_SET_LOC));
bufferlist features_bl;
db->get(MONITOR_NAME, COMPAT_SET_LOC, features_bl);
assert(features_bl.length());
CompatSet features;
bufferlist::iterator p = features_bl.begin();
features.decode(p);
assert(features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_GV));
features.incompat.remove(CEPH_MON_FEATURE_INCOMPAT_GV);
assert(!features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_GV));
features.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_SINGLE_PAXOS);
assert(features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_SINGLE_PAXOS));
features_bl.clear();
features.encode(features_bl);
dout(20) << __func__ << " new features " << features << dendl;
t->put(MONITOR_NAME, COMPAT_SET_LOC, features_bl);
}
bool Monitor::StoreConverter::_check_gv_store()
{
dout(20) << __func__ << dendl;
if (!store->exists_bl_ss(COMPAT_SET_LOC, 0))
return false;
bufferlist features_bl;
store->get_bl_ss_safe(features_bl, COMPAT_SET_LOC, 0);
if (!features_bl.length()) {
dout(20) << __func__ << " on-disk features length is zero" << dendl;
return false;
}
CompatSet features;
bufferlist::iterator p = features_bl.begin();
features.decode(p);
return (features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_GV));
}
int Monitor::StoreConverter::needs_conversion()
{
bufferlist magicbl;
int ret = 0;
dout(10) << "check if store needs conversion from legacy format" << dendl;
_init();
int err = store->mount();
if (err < 0) {
if (err == -ENOENT) {
derr << "unable to mount monitor store: "
<< cpp_strerror(err) << dendl;
} else {
derr << "it appears that another monitor is running: "
<< cpp_strerror(err) << dendl;
}
ret = err;
goto out;
}
assert(err == 0);
if (store->exists_bl_ss("magic", 0)) {
if (_check_gv_store()) {
dout(1) << "found old GV monitor store format "
<< "-- should convert!" << dendl;
ret = 1;
} else {
dout(0) << "Existing monitor store has not been converted "
<< "to 0.52 (bobtail) format" << dendl;
assert(0 == "Existing store has not been converted to 0.52 format");
}
}
assert(!store->umount());
out:
_deinit();
return ret;
}
int Monitor::StoreConverter::convert()
{
_init();
assert(!store->mount());
if (db->exists("mon_convert", "on_going")) {
dout(0) << __func__ << " found a mon store in mid-convertion; abort!"
<< dendl;
return -EEXIST;
}
_mark_convert_start();
_convert_monitor();
_convert_machines();
_convert_paxos();
_mark_convert_finish();
store->umount();
_deinit();
dout(0) << __func__ << " finished conversion" << dendl;
return 0;
}
void Monitor::StoreConverter::_convert_monitor()
{
dout(10) << __func__ << dendl;
assert(store->exists_bl_ss("magic"));
assert(store->exists_bl_ss("keyring"));
assert(store->exists_bl_ss("feature_set"));
assert(store->exists_bl_ss("election_epoch"));
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
if (store->exists_bl_ss("joined")) {
version_t joined = store->get_int("joined");
tx->put(MONITOR_NAME, "joined", joined);
}
vector<string> keys;
keys.push_back("magic");
keys.push_back("feature_set");
keys.push_back("cluster_uuid");
vector<string>::iterator it;
for (it = keys.begin(); it != keys.end(); ++it) {
if (!store->exists_bl_ss((*it).c_str()))
continue;
bufferlist bl;
int r = store->get_bl_ss(bl, (*it).c_str(), 0);
assert(r > 0);
tx->put(MONITOR_NAME, *it, bl);
}
version_t election_epoch = store->get_int("election_epoch");
tx->put(MONITOR_NAME, "election_epoch", election_epoch);
assert(!tx->empty());
db->apply_transaction(tx);
dout(10) << __func__ << " finished" << dendl;
}
void Monitor::StoreConverter::_convert_machines(string machine)
{
dout(10) << __func__ << " " << machine << dendl;
version_t first_committed =
store->get_int(machine.c_str(), "first_committed");
version_t last_committed =
store->get_int(machine.c_str(), "last_committed");
version_t accepted_pn = store->get_int(machine.c_str(), "accepted_pn");
version_t last_pn = store->get_int(machine.c_str(), "last_pn");
if (accepted_pn > highest_accepted_pn)
highest_accepted_pn = accepted_pn;
if (last_pn > highest_last_pn)
highest_last_pn = last_pn;
string machine_gv(machine);
machine_gv.append("_gv");
bool has_gv = true;
if (!store->exists_bl_ss(machine_gv.c_str())) {
dout(1) << __func__ << " " << machine
<< " no gv dir '" << machine_gv << "'" << dendl;
has_gv = false;
}
for (version_t ver = first_committed; ver <= last_committed; ver++) {
if (!store->exists_bl_sn(machine.c_str(), ver)) {
dout(20) << __func__ << " " << machine
<< " ver " << ver << " dne" << dendl;
continue;
}
bufferlist bl;
int r = store->get_bl_sn(bl, machine.c_str(), ver);
assert(r >= 0);
dout(20) << __func__ << " " << machine
<< " ver " << ver << " bl " << bl.length() << dendl;
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
tx->put(machine, ver, bl);
tx->put(machine, "last_committed", ver);
if (has_gv && store->exists_bl_sn(machine_gv.c_str(), ver)) {
stringstream s;
s << ver;
string ver_str = s.str();
version_t gv = store->get_int(machine_gv.c_str(), ver_str.c_str());
dout(20) << __func__ << " " << machine
<< " ver " << ver << " -> " << gv << dendl;
MonitorDBStore::TransactionRef paxos_tx(new MonitorDBStore::Transaction);
if (gvs.count(gv) == 0) {
gvs.insert(gv);
} else {
dout(0) << __func__ << " " << machine
<< " gv " << gv << " already exists"
<< dendl;
// Duplicates aren't supposed to happen, but an old bug introduced
// them and the mds state machine wasn't ever trimmed, so many users
// will see them. So we'll just merge them all in one
// single paxos version.
// We know that they are either from another paxos machine or
// they are from the same paxos machine but their version is
// lower than ours -- given that we are iterating all versions
// from the lowest to the highest, duh!
// We'll just append our stuff to the existing paxos transaction
// as if nothing had happened.
// Just make sure we are correct. This shouldn't take long and
// should never be triggered!
set<pair<string,version_t> >& s = gv_map[gv];
for (set<pair<string,version_t> >::iterator it = s.begin();
it != s.end(); ++it) {
if (it->first == machine)
assert(it->second + 1 == ver);
}
bufferlist paxos_bl;
int r = db->get("paxos", gv, paxos_bl);
assert(r >= 0);
paxos_tx->append_from_encoded(paxos_bl);
}
gv_map[gv].insert(make_pair(machine,ver));
bufferlist tx_bl;
tx->encode(tx_bl);
paxos_tx->append_from_encoded(tx_bl);
bufferlist paxos_bl;
paxos_tx->encode(paxos_bl);
tx->put("paxos", gv, paxos_bl);
}
db->apply_transaction(tx);
}
version_t lc = db->get(machine, "last_committed");
dout(20) << __func__ << " lc " << lc << " last_committed " << last_committed << dendl;
assert(lc == last_committed);
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
tx->put(machine, "first_committed", first_committed);
tx->put(machine, "last_committed", last_committed);
tx->put(machine, "conversion_first", first_committed);
if (store->exists_bl_ss(machine.c_str(), "latest")) {
bufferlist latest_bl_raw;
int r = store->get_bl_ss(latest_bl_raw, machine.c_str(), "latest");
assert(r >= 0);
if (!latest_bl_raw.length()) {
dout(20) << __func__ << " machine " << machine
<< " skip latest with size 0" << dendl;
goto out;
}
tx->put(machine, "latest", latest_bl_raw);
bufferlist::iterator lbl_it = latest_bl_raw.begin();
bufferlist latest_bl;
version_t latest_ver;
::decode(latest_ver, lbl_it);
::decode(latest_bl, lbl_it);
dout(20) << __func__ << " machine " << machine
<< " latest ver " << latest_ver << dendl;
tx->put(machine, "full_latest", latest_ver);
stringstream os;
os << "full_" << latest_ver;
tx->put(machine, os.str(), latest_bl);
}
out:
db->apply_transaction(tx);
dout(10) << __func__ << " machine " << machine << " finished" << dendl;
}
void Monitor::StoreConverter::_convert_osdmap_full()
{
dout(10) << __func__ << dendl;
version_t first_committed =
store->get_int("osdmap", "first_committed");
version_t last_committed =
store->get_int("osdmap", "last_committed");
int err = 0;
for (version_t ver = first_committed; ver <= last_committed; ver++) {
if (!store->exists_bl_sn("osdmap_full", ver)) {
dout(20) << __func__ << " osdmap_full ver " << ver << " dne" << dendl;
err++;
continue;
}
bufferlist bl;
int r = store->get_bl_sn(bl, "osdmap_full", ver);
assert(r >= 0);
dout(20) << __func__ << " osdmap_full ver " << ver
<< " bl " << bl.length() << " bytes" << dendl;
string full_key = "full_" + stringify(ver);
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
tx->put("osdmap", full_key, bl);
db->apply_transaction(tx);
}
dout(10) << __func__ << " found " << err << " conversion errors!" << dendl;
assert(err == 0);
}
void Monitor::StoreConverter::_convert_paxos()
{
dout(10) << __func__ << dendl;
assert(!gvs.empty());
set<version_t>::reverse_iterator rit = gvs.rbegin();
version_t highest_gv = *rit;
version_t last_gv = highest_gv;
int n = 0;
int max_versions = (g_conf->paxos_max_join_drift*2);
for (; (rit != gvs.rend()) && (n < max_versions); ++rit, ++n) {
version_t gv = *rit;
if (last_gv == gv)
continue;
if ((last_gv - gv) > 1) {
// we are done; we found a gap and we are only interested in keeping
// contiguous paxos versions.
break;
}
last_gv = gv;
}
// erase all paxos versions between [first, last_gv[, with first being the
// first gv in the map.
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
set<version_t>::iterator it = gvs.begin();
dout(1) << __func__ << " first gv " << (*it)
<< " last gv " << last_gv << dendl;
for (; it != gvs.end() && (*it < last_gv); ++it) {
tx->erase("paxos", *it);
}
tx->put("paxos", "first_committed", last_gv);
tx->put("paxos", "last_committed", highest_gv);
tx->put("paxos", "accepted_pn", highest_accepted_pn);
tx->put("paxos", "last_pn", highest_last_pn);
tx->put("paxos", "conversion_first", last_gv);
db->apply_transaction(tx);
dout(10) << __func__ << " finished" << dendl;
}
void Monitor::StoreConverter::_convert_machines()
{
dout(10) << __func__ << dendl;
set<string> machine_names = _get_machines_names();
set<string>::iterator it = machine_names.begin();
for (; it != machine_names.end(); ++it) {
_convert_machines(*it);
}
// convert osdmap full versions
// this stays here as these aren't really an independent paxos
// machine, but rather machine-specific and don't fit on the
// _convert_machines(string) function.
_convert_osdmap_full();
dout(10) << __func__ << " finished" << dendl;
}

View File

@ -852,83 +852,6 @@ private:
Monitor& operator=(const Monitor &rhs);
public:
class StoreConverter {
const string path;
MonitorDBStore *db;
boost::scoped_ptr<MonitorStore> store;
set<version_t> gvs;
map<version_t, set<pair<string,version_t> > > gv_map;
version_t highest_last_pn;
version_t highest_accepted_pn;
public:
StoreConverter(string path, MonitorDBStore *d)
: path(path), db(d), store(NULL),
highest_last_pn(0), highest_accepted_pn(0)
{ }
/**
* Check if store needs to be converted from old format to a
* k/v store.
*
* @returns 0 if store doesn't need conversion; 1 if it does; <0 if error
*/
int needs_conversion();
int convert();
bool is_converting() {
return db->exists("mon_convert", "on_going");
}
private:
bool _check_gv_store();
void _init() {
assert(!store);
MonitorStore *store_ptr = new MonitorStore(path);
store.reset(store_ptr);
}
void _deinit() {
store.reset(NULL);
}
set<string> _get_machines_names() {
set<string> names;
names.insert("auth");
names.insert("logm");
names.insert("mdsmap");
names.insert("monmap");
names.insert("osdmap");
names.insert("pgmap");
return names;
}
void _mark_convert_start() {
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
tx->put("mon_convert", "on_going", 1);
db->apply_transaction(tx);
}
void _convert_finish_features(MonitorDBStore::TransactionRef t);
void _mark_convert_finish() {
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
tx->erase("mon_convert", "on_going");
_convert_finish_features(tx);
db->apply_transaction(tx);
}
void _convert_monitor();
void _convert_machines(string machine);
void _convert_osdmap_full();
void _convert_machines();
void _convert_paxos();
};
static void format_command_descriptions(const MonCommand *commands,
unsigned commands_size,
Formatter *f,