mirror of
https://github.com/ceph/ceph
synced 2024-12-22 03:22:00 +00:00
Merge pull request #3379 from ceph/wip-mon-drop-conversion
mon: drop store conversion code Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
commit
062d3b0215
@ -510,40 +510,10 @@ int main(int argc, const char **argv)
|
||||
}
|
||||
|
||||
MonitorDBStore *store = new MonitorDBStore(g_conf->mon_data);
|
||||
|
||||
Monitor::StoreConverter converter(g_conf->mon_data, store);
|
||||
if (store->open(std::cerr) < 0) {
|
||||
int needs_conversion = converter.needs_conversion();
|
||||
if (needs_conversion < 0) {
|
||||
if (needs_conversion == -ENOENT) {
|
||||
derr << "monitor data directory at '" << g_conf->mon_data
|
||||
<< "' is not empty but has no valid store nor legacy monitor"
|
||||
<< " store." << dendl;
|
||||
} else {
|
||||
derr << "found errors while validating legacy unconverted"
|
||||
<< " monitor store: " << cpp_strerror(needs_conversion) << dendl;
|
||||
}
|
||||
prefork.exit(1);
|
||||
}
|
||||
|
||||
int ret = store->create_and_open(std::cerr);
|
||||
if (ret < 0) {
|
||||
derr << "failed to create new leveldb store" << dendl;
|
||||
prefork.exit(1);
|
||||
}
|
||||
|
||||
if (needs_conversion > 0) {
|
||||
dout(0) << "converting monitor store, please do not interrupt..." << dendl;
|
||||
int r = converter.convert();
|
||||
if (r) {
|
||||
derr << "failed to convert monitor store: " << cpp_strerror(r) << dendl;
|
||||
prefork.exit(1);
|
||||
}
|
||||
}
|
||||
} else if (converter.is_converting()) {
|
||||
derr << "there is an on-going (maybe aborted?) conversion." << dendl;
|
||||
derr << "you should check what happened" << dendl;
|
||||
derr << "remove store.db to restart conversion" << dendl;
|
||||
err = store->open(std::cerr);
|
||||
if (err < 0) {
|
||||
derr << "error opening mon data directory at '"
|
||||
<< g_conf->mon_data << "': " << cpp_strerror(err) << dendl;
|
||||
prefork.exit(1);
|
||||
}
|
||||
|
||||
|
@ -4442,387 +4442,3 @@ bool Monitor::ms_verify_authorizer(Connection *con, int peer_type,
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef dout_prefix
|
||||
#define dout_prefix *_dout
|
||||
|
||||
void Monitor::StoreConverter::_convert_finish_features(
|
||||
MonitorDBStore::TransactionRef t)
|
||||
{
|
||||
dout(20) << __func__ << dendl;
|
||||
|
||||
assert(db->exists(MONITOR_NAME, COMPAT_SET_LOC));
|
||||
bufferlist features_bl;
|
||||
db->get(MONITOR_NAME, COMPAT_SET_LOC, features_bl);
|
||||
assert(features_bl.length());
|
||||
|
||||
CompatSet features;
|
||||
bufferlist::iterator p = features_bl.begin();
|
||||
features.decode(p);
|
||||
|
||||
assert(features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_GV));
|
||||
features.incompat.remove(CEPH_MON_FEATURE_INCOMPAT_GV);
|
||||
assert(!features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_GV));
|
||||
|
||||
features.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_SINGLE_PAXOS);
|
||||
assert(features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_SINGLE_PAXOS));
|
||||
|
||||
features_bl.clear();
|
||||
features.encode(features_bl);
|
||||
|
||||
dout(20) << __func__ << " new features " << features << dendl;
|
||||
t->put(MONITOR_NAME, COMPAT_SET_LOC, features_bl);
|
||||
}
|
||||
|
||||
|
||||
bool Monitor::StoreConverter::_check_gv_store()
|
||||
{
|
||||
dout(20) << __func__ << dendl;
|
||||
if (!store->exists_bl_ss(COMPAT_SET_LOC, 0))
|
||||
return false;
|
||||
|
||||
bufferlist features_bl;
|
||||
store->get_bl_ss_safe(features_bl, COMPAT_SET_LOC, 0);
|
||||
if (!features_bl.length()) {
|
||||
dout(20) << __func__ << " on-disk features length is zero" << dendl;
|
||||
return false;
|
||||
}
|
||||
CompatSet features;
|
||||
bufferlist::iterator p = features_bl.begin();
|
||||
features.decode(p);
|
||||
return (features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_GV));
|
||||
}
|
||||
|
||||
int Monitor::StoreConverter::needs_conversion()
|
||||
{
|
||||
bufferlist magicbl;
|
||||
int ret = 0;
|
||||
|
||||
dout(10) << "check if store needs conversion from legacy format" << dendl;
|
||||
_init();
|
||||
|
||||
int err = store->mount();
|
||||
if (err < 0) {
|
||||
if (err == -ENOENT) {
|
||||
derr << "unable to mount monitor store: "
|
||||
<< cpp_strerror(err) << dendl;
|
||||
} else {
|
||||
derr << "it appears that another monitor is running: "
|
||||
<< cpp_strerror(err) << dendl;
|
||||
}
|
||||
ret = err;
|
||||
goto out;
|
||||
}
|
||||
assert(err == 0);
|
||||
|
||||
if (store->exists_bl_ss("magic", 0)) {
|
||||
if (_check_gv_store()) {
|
||||
dout(1) << "found old GV monitor store format "
|
||||
<< "-- should convert!" << dendl;
|
||||
ret = 1;
|
||||
} else {
|
||||
dout(0) << "Existing monitor store has not been converted "
|
||||
<< "to 0.52 (bobtail) format" << dendl;
|
||||
assert(0 == "Existing store has not been converted to 0.52 format");
|
||||
}
|
||||
}
|
||||
assert(!store->umount());
|
||||
|
||||
out:
|
||||
_deinit();
|
||||
return ret;
|
||||
}
|
||||
|
||||
int Monitor::StoreConverter::convert()
|
||||
{
|
||||
_init();
|
||||
assert(!store->mount());
|
||||
if (db->exists("mon_convert", "on_going")) {
|
||||
dout(0) << __func__ << " found a mon store in mid-convertion; abort!"
|
||||
<< dendl;
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
_mark_convert_start();
|
||||
_convert_monitor();
|
||||
_convert_machines();
|
||||
_convert_paxos();
|
||||
_mark_convert_finish();
|
||||
|
||||
store->umount();
|
||||
_deinit();
|
||||
|
||||
dout(0) << __func__ << " finished conversion" << dendl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Monitor::StoreConverter::_convert_monitor()
|
||||
{
|
||||
dout(10) << __func__ << dendl;
|
||||
|
||||
assert(store->exists_bl_ss("magic"));
|
||||
assert(store->exists_bl_ss("keyring"));
|
||||
assert(store->exists_bl_ss("feature_set"));
|
||||
assert(store->exists_bl_ss("election_epoch"));
|
||||
|
||||
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
|
||||
|
||||
if (store->exists_bl_ss("joined")) {
|
||||
version_t joined = store->get_int("joined");
|
||||
tx->put(MONITOR_NAME, "joined", joined);
|
||||
}
|
||||
|
||||
vector<string> keys;
|
||||
keys.push_back("magic");
|
||||
keys.push_back("feature_set");
|
||||
keys.push_back("cluster_uuid");
|
||||
|
||||
vector<string>::iterator it;
|
||||
for (it = keys.begin(); it != keys.end(); ++it) {
|
||||
if (!store->exists_bl_ss((*it).c_str()))
|
||||
continue;
|
||||
|
||||
bufferlist bl;
|
||||
int r = store->get_bl_ss(bl, (*it).c_str(), 0);
|
||||
assert(r > 0);
|
||||
tx->put(MONITOR_NAME, *it, bl);
|
||||
}
|
||||
version_t election_epoch = store->get_int("election_epoch");
|
||||
tx->put(MONITOR_NAME, "election_epoch", election_epoch);
|
||||
|
||||
assert(!tx->empty());
|
||||
db->apply_transaction(tx);
|
||||
dout(10) << __func__ << " finished" << dendl;
|
||||
}
|
||||
|
||||
void Monitor::StoreConverter::_convert_machines(string machine)
|
||||
{
|
||||
dout(10) << __func__ << " " << machine << dendl;
|
||||
|
||||
version_t first_committed =
|
||||
store->get_int(machine.c_str(), "first_committed");
|
||||
version_t last_committed =
|
||||
store->get_int(machine.c_str(), "last_committed");
|
||||
|
||||
version_t accepted_pn = store->get_int(machine.c_str(), "accepted_pn");
|
||||
version_t last_pn = store->get_int(machine.c_str(), "last_pn");
|
||||
|
||||
if (accepted_pn > highest_accepted_pn)
|
||||
highest_accepted_pn = accepted_pn;
|
||||
if (last_pn > highest_last_pn)
|
||||
highest_last_pn = last_pn;
|
||||
|
||||
string machine_gv(machine);
|
||||
machine_gv.append("_gv");
|
||||
bool has_gv = true;
|
||||
|
||||
if (!store->exists_bl_ss(machine_gv.c_str())) {
|
||||
dout(1) << __func__ << " " << machine
|
||||
<< " no gv dir '" << machine_gv << "'" << dendl;
|
||||
has_gv = false;
|
||||
}
|
||||
|
||||
for (version_t ver = first_committed; ver <= last_committed; ver++) {
|
||||
if (!store->exists_bl_sn(machine.c_str(), ver)) {
|
||||
dout(20) << __func__ << " " << machine
|
||||
<< " ver " << ver << " dne" << dendl;
|
||||
continue;
|
||||
}
|
||||
|
||||
bufferlist bl;
|
||||
int r = store->get_bl_sn(bl, machine.c_str(), ver);
|
||||
assert(r >= 0);
|
||||
dout(20) << __func__ << " " << machine
|
||||
<< " ver " << ver << " bl " << bl.length() << dendl;
|
||||
|
||||
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
|
||||
tx->put(machine, ver, bl);
|
||||
tx->put(machine, "last_committed", ver);
|
||||
|
||||
if (has_gv && store->exists_bl_sn(machine_gv.c_str(), ver)) {
|
||||
stringstream s;
|
||||
s << ver;
|
||||
string ver_str = s.str();
|
||||
|
||||
version_t gv = store->get_int(machine_gv.c_str(), ver_str.c_str());
|
||||
dout(20) << __func__ << " " << machine
|
||||
<< " ver " << ver << " -> " << gv << dendl;
|
||||
|
||||
MonitorDBStore::TransactionRef paxos_tx(new MonitorDBStore::Transaction);
|
||||
|
||||
if (gvs.count(gv) == 0) {
|
||||
gvs.insert(gv);
|
||||
} else {
|
||||
dout(0) << __func__ << " " << machine
|
||||
<< " gv " << gv << " already exists"
|
||||
<< dendl;
|
||||
|
||||
// Duplicates aren't supposed to happen, but an old bug introduced
|
||||
// them and the mds state machine wasn't ever trimmed, so many users
|
||||
// will see them. So we'll just merge them all in one
|
||||
// single paxos version.
|
||||
// We know that they are either from another paxos machine or
|
||||
// they are from the same paxos machine but their version is
|
||||
// lower than ours -- given that we are iterating all versions
|
||||
// from the lowest to the highest, duh!
|
||||
// We'll just append our stuff to the existing paxos transaction
|
||||
// as if nothing had happened.
|
||||
|
||||
// Just make sure we are correct. This shouldn't take long and
|
||||
// should never be triggered!
|
||||
set<pair<string,version_t> >& s = gv_map[gv];
|
||||
for (set<pair<string,version_t> >::iterator it = s.begin();
|
||||
it != s.end(); ++it) {
|
||||
if (it->first == machine)
|
||||
assert(it->second + 1 == ver);
|
||||
}
|
||||
|
||||
bufferlist paxos_bl;
|
||||
int r = db->get("paxos", gv, paxos_bl);
|
||||
assert(r >= 0);
|
||||
paxos_tx->append_from_encoded(paxos_bl);
|
||||
}
|
||||
gv_map[gv].insert(make_pair(machine,ver));
|
||||
|
||||
bufferlist tx_bl;
|
||||
tx->encode(tx_bl);
|
||||
paxos_tx->append_from_encoded(tx_bl);
|
||||
bufferlist paxos_bl;
|
||||
paxos_tx->encode(paxos_bl);
|
||||
tx->put("paxos", gv, paxos_bl);
|
||||
}
|
||||
db->apply_transaction(tx);
|
||||
}
|
||||
|
||||
version_t lc = db->get(machine, "last_committed");
|
||||
dout(20) << __func__ << " lc " << lc << " last_committed " << last_committed << dendl;
|
||||
assert(lc == last_committed);
|
||||
|
||||
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
|
||||
tx->put(machine, "first_committed", first_committed);
|
||||
tx->put(machine, "last_committed", last_committed);
|
||||
tx->put(machine, "conversion_first", first_committed);
|
||||
|
||||
if (store->exists_bl_ss(machine.c_str(), "latest")) {
|
||||
bufferlist latest_bl_raw;
|
||||
int r = store->get_bl_ss(latest_bl_raw, machine.c_str(), "latest");
|
||||
assert(r >= 0);
|
||||
if (!latest_bl_raw.length()) {
|
||||
dout(20) << __func__ << " machine " << machine
|
||||
<< " skip latest with size 0" << dendl;
|
||||
goto out;
|
||||
}
|
||||
|
||||
tx->put(machine, "latest", latest_bl_raw);
|
||||
|
||||
bufferlist::iterator lbl_it = latest_bl_raw.begin();
|
||||
bufferlist latest_bl;
|
||||
version_t latest_ver;
|
||||
::decode(latest_ver, lbl_it);
|
||||
::decode(latest_bl, lbl_it);
|
||||
|
||||
dout(20) << __func__ << " machine " << machine
|
||||
<< " latest ver " << latest_ver << dendl;
|
||||
|
||||
tx->put(machine, "full_latest", latest_ver);
|
||||
stringstream os;
|
||||
os << "full_" << latest_ver;
|
||||
tx->put(machine, os.str(), latest_bl);
|
||||
}
|
||||
out:
|
||||
db->apply_transaction(tx);
|
||||
dout(10) << __func__ << " machine " << machine << " finished" << dendl;
|
||||
}
|
||||
|
||||
void Monitor::StoreConverter::_convert_osdmap_full()
|
||||
{
|
||||
dout(10) << __func__ << dendl;
|
||||
version_t first_committed =
|
||||
store->get_int("osdmap", "first_committed");
|
||||
version_t last_committed =
|
||||
store->get_int("osdmap", "last_committed");
|
||||
|
||||
int err = 0;
|
||||
for (version_t ver = first_committed; ver <= last_committed; ver++) {
|
||||
if (!store->exists_bl_sn("osdmap_full", ver)) {
|
||||
dout(20) << __func__ << " osdmap_full ver " << ver << " dne" << dendl;
|
||||
err++;
|
||||
continue;
|
||||
}
|
||||
|
||||
bufferlist bl;
|
||||
int r = store->get_bl_sn(bl, "osdmap_full", ver);
|
||||
assert(r >= 0);
|
||||
dout(20) << __func__ << " osdmap_full ver " << ver
|
||||
<< " bl " << bl.length() << " bytes" << dendl;
|
||||
|
||||
string full_key = "full_" + stringify(ver);
|
||||
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
|
||||
tx->put("osdmap", full_key, bl);
|
||||
db->apply_transaction(tx);
|
||||
}
|
||||
dout(10) << __func__ << " found " << err << " conversion errors!" << dendl;
|
||||
assert(err == 0);
|
||||
}
|
||||
|
||||
void Monitor::StoreConverter::_convert_paxos()
|
||||
{
|
||||
dout(10) << __func__ << dendl;
|
||||
assert(!gvs.empty());
|
||||
|
||||
set<version_t>::reverse_iterator rit = gvs.rbegin();
|
||||
version_t highest_gv = *rit;
|
||||
version_t last_gv = highest_gv;
|
||||
|
||||
int n = 0;
|
||||
int max_versions = (g_conf->paxos_max_join_drift*2);
|
||||
for (; (rit != gvs.rend()) && (n < max_versions); ++rit, ++n) {
|
||||
version_t gv = *rit;
|
||||
|
||||
if (last_gv == gv)
|
||||
continue;
|
||||
if ((last_gv - gv) > 1) {
|
||||
// we are done; we found a gap and we are only interested in keeping
|
||||
// contiguous paxos versions.
|
||||
break;
|
||||
}
|
||||
last_gv = gv;
|
||||
}
|
||||
|
||||
// erase all paxos versions between [first, last_gv[, with first being the
|
||||
// first gv in the map.
|
||||
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
|
||||
set<version_t>::iterator it = gvs.begin();
|
||||
dout(1) << __func__ << " first gv " << (*it)
|
||||
<< " last gv " << last_gv << dendl;
|
||||
for (; it != gvs.end() && (*it < last_gv); ++it) {
|
||||
tx->erase("paxos", *it);
|
||||
}
|
||||
tx->put("paxos", "first_committed", last_gv);
|
||||
tx->put("paxos", "last_committed", highest_gv);
|
||||
tx->put("paxos", "accepted_pn", highest_accepted_pn);
|
||||
tx->put("paxos", "last_pn", highest_last_pn);
|
||||
tx->put("paxos", "conversion_first", last_gv);
|
||||
db->apply_transaction(tx);
|
||||
|
||||
dout(10) << __func__ << " finished" << dendl;
|
||||
}
|
||||
|
||||
void Monitor::StoreConverter::_convert_machines()
|
||||
{
|
||||
dout(10) << __func__ << dendl;
|
||||
set<string> machine_names = _get_machines_names();
|
||||
set<string>::iterator it = machine_names.begin();
|
||||
|
||||
for (; it != machine_names.end(); ++it) {
|
||||
_convert_machines(*it);
|
||||
}
|
||||
// convert osdmap full versions
|
||||
// this stays here as these aren't really an independent paxos
|
||||
// machine, but rather machine-specific and don't fit on the
|
||||
// _convert_machines(string) function.
|
||||
_convert_osdmap_full();
|
||||
|
||||
dout(10) << __func__ << " finished" << dendl;
|
||||
}
|
||||
|
@ -852,83 +852,6 @@ private:
|
||||
Monitor& operator=(const Monitor &rhs);
|
||||
|
||||
public:
|
||||
class StoreConverter {
|
||||
const string path;
|
||||
MonitorDBStore *db;
|
||||
boost::scoped_ptr<MonitorStore> store;
|
||||
|
||||
set<version_t> gvs;
|
||||
map<version_t, set<pair<string,version_t> > > gv_map;
|
||||
|
||||
version_t highest_last_pn;
|
||||
version_t highest_accepted_pn;
|
||||
|
||||
public:
|
||||
StoreConverter(string path, MonitorDBStore *d)
|
||||
: path(path), db(d), store(NULL),
|
||||
highest_last_pn(0), highest_accepted_pn(0)
|
||||
{ }
|
||||
|
||||
/**
|
||||
* Check if store needs to be converted from old format to a
|
||||
* k/v store.
|
||||
*
|
||||
* @returns 0 if store doesn't need conversion; 1 if it does; <0 if error
|
||||
*/
|
||||
int needs_conversion();
|
||||
int convert();
|
||||
|
||||
bool is_converting() {
|
||||
return db->exists("mon_convert", "on_going");
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
bool _check_gv_store();
|
||||
|
||||
void _init() {
|
||||
assert(!store);
|
||||
MonitorStore *store_ptr = new MonitorStore(path);
|
||||
store.reset(store_ptr);
|
||||
}
|
||||
|
||||
void _deinit() {
|
||||
store.reset(NULL);
|
||||
}
|
||||
|
||||
set<string> _get_machines_names() {
|
||||
set<string> names;
|
||||
names.insert("auth");
|
||||
names.insert("logm");
|
||||
names.insert("mdsmap");
|
||||
names.insert("monmap");
|
||||
names.insert("osdmap");
|
||||
names.insert("pgmap");
|
||||
|
||||
return names;
|
||||
}
|
||||
|
||||
void _mark_convert_start() {
|
||||
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
|
||||
tx->put("mon_convert", "on_going", 1);
|
||||
db->apply_transaction(tx);
|
||||
}
|
||||
|
||||
void _convert_finish_features(MonitorDBStore::TransactionRef t);
|
||||
void _mark_convert_finish() {
|
||||
MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
|
||||
tx->erase("mon_convert", "on_going");
|
||||
_convert_finish_features(tx);
|
||||
db->apply_transaction(tx);
|
||||
}
|
||||
|
||||
void _convert_monitor();
|
||||
void _convert_machines(string machine);
|
||||
void _convert_osdmap_full();
|
||||
void _convert_machines();
|
||||
void _convert_paxos();
|
||||
};
|
||||
|
||||
static void format_command_descriptions(const MonCommand *commands,
|
||||
unsigned commands_size,
|
||||
Formatter *f,
|
||||
|
Loading…
Reference in New Issue
Block a user