Merge pull request #27465 from tchaikov/wip-38219

ceph-monstore-tool: use a large enough paxos/{first,last}_committed

Reviewed-by: Neha Ojha <nojha@redhat.com>
This commit is contained in:
Kefu Chai 2021-06-16 09:38:45 +08:00 committed by GitHub
commit 6f58a26281
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 58 deletions

View File

@ -441,7 +441,9 @@ deploy at least three (and preferably five) monitors in a Ceph cluster, the chan
failure is rare. But unplanned power-downs in a data center with improperly
configured disk/fs settings could fail the underlying file system, and hence
kill all the monitors. In this case, we can recover the monitor store with the
information stored in OSDs.::
information stored in OSDs.
.. code-block:: bash
ms=/root/mon-store
mkdir $ms

View File

@ -3,6 +3,7 @@ ceph manager -- Thrasher and CephManager objects
"""
from functools import wraps
import contextlib
import errno
import random
import signal
import time
@ -3091,13 +3092,22 @@ class CephManager:
Loop until quorum size is reached.
"""
self.log('waiting for quorum size %d' % size)
start = time.time()
while not len(self.get_mon_quorum()) == size:
if timeout is not None:
assert time.time() - start < timeout, \
('failed to reach quorum size %d '
'before timeout expired' % size)
time.sleep(3)
sleep = 3
with safe_while(sleep=sleep,
tries=timeout // sleep,
action=f'wait for quorum size {size}') as proceed:
while proceed():
try:
if len(self.get_mon_quorum()) == size:
break
except CommandFailedError as e:
# could fail instea4d of blocked if the rotating key of the
# connected monitor is not updated yet after they form the
# quorum
if e.exitstatus == errno.EACCES:
pass
else:
raise
self.log("quorum is size %d" % size)
def get_mon_health(self, debug=False):

View File

@ -23,6 +23,7 @@
#include "auth/KeyRing.h"
#include "auth/cephx/CephxKeyServer.h"
#include "global/global_init.h"
#include "include/scope_guard.h"
#include "include/stringify.h"
#include "mgr/mgr_commands.h"
#include "mon/AuthMonitor.h"
@ -640,6 +641,24 @@ static int update_mgrmap(MonitorDBStore& st)
static int update_paxos(MonitorDBStore& st)
{
const string prefix("paxos");
// a large enough version greater than the maximum possible `last_committed`
// that could be replied by the peons when the leader is collecting paxos
// transactions during recovery
constexpr version_t first_committed = 0x42;
constexpr version_t last_committed = first_committed;
for (version_t v = first_committed; v < last_committed + 1; v++) {
auto t = make_shared<MonitorDBStore::Transaction>();
if (v == first_committed) {
t->put(prefix, "first_committed", v);
}
bufferlist proposal;
MonitorDBStore::Transaction empty_txn;
empty_txn.encode(proposal);
t->put(prefix, v, proposal);
t->put(prefix, "last_committed", v);
st.apply_transaction(t);
}
// build a pending paxos proposal from all non-permanent k/v pairs. once the
// proposal is committed, it will gets applied. on the sync provider side, it
// will be a no-op, but on its peers, the paxos commit will help to build up
@ -658,11 +677,8 @@ static int update_paxos(MonitorDBStore& st)
}
t.encode(pending_proposal);
}
const string prefix("paxos");
auto pending_v = last_committed + 1;
auto t = make_shared<MonitorDBStore::Transaction>();
t->put(prefix, "first_committed", 0);
t->put(prefix, "last_committed", 0);
auto pending_v = 1;
t->put(prefix, pending_v, pending_proposal);
t->put(prefix, "pending_v", pending_v);
t->put(prefix, "pending_pn", 400);
@ -821,6 +837,10 @@ int main(int argc, char **argv) {
}
}
auto close_store = make_scope_guard([&] {
st.close();
});
if (cmd == "dump-keys") {
KeyValueDB::WholeSpaceIterator iter = st.get_iterator();
while (iter->valid()) {
@ -859,14 +879,12 @@ int main(int argc, char **argv) {
int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
subcmds, &op_vm);
if (r < 0) {
err = -r;
goto done;
return -r;
}
if (op_vm.count("help") || map_type.empty()) {
usage(argv[0], op_desc);
err = 0;
goto done;
return 0;
}
if (v == 0) {
@ -883,17 +901,16 @@ int main(int argc, char **argv) {
if (fd < 0) {
std::cerr << "error opening output file: "
<< cpp_strerror(errno) << std::endl;
err = EINVAL;
goto done;
return EINVAL;
}
}
BOOST_SCOPE_EXIT((&r) (&fd) (&outpath)) {
auto close_fd = make_scope_guard([&] {
::close(fd);
if (r < 0 && fd != STDOUT_FILENO) {
::remove(outpath.c_str());
}
} BOOST_SCOPE_EXIT_END
});
bufferlist bl;
r = 0;
@ -912,8 +929,7 @@ int main(int argc, char **argv) {
}
if (r < 0) {
std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
err = EINVAL;
goto done;
return EINVAL;
}
if (op_vm.count("readable")) {
@ -981,14 +997,12 @@ int main(int argc, char **argv) {
int r = parse_cmd_args(&op_desc, NULL, &op_positional,
subcmds, &op_vm);
if (r < 0) {
err = -r;
goto done;
return -r;
}
if (op_vm.count("help") || map_type.empty()) {
usage(argv[0], op_desc);
err = 0;
goto done;
return 0;
}
unsigned int v_first = 0;
@ -1014,22 +1028,19 @@ int main(int argc, char **argv) {
int r = parse_cmd_args(&op_desc, NULL, NULL,
subcmds, &op_vm);
if (r < 0) {
err = -r;
goto done;
return -r;
}
if (op_vm.count("help")) {
usage(argv[0], op_desc);
err = 0;
goto done;
return 0;
}
if (dstart > dstop) {
std::cerr << "error: 'start' version (value: " << dstart << ") "
<< " is greater than 'end' version (value: " << dstop << ")"
<< std::endl;
err = EINVAL;
goto done;
return EINVAL;
}
version_t v = dstart;
@ -1077,28 +1088,24 @@ int main(int argc, char **argv) {
int r = parse_cmd_args(&op_desc, &hidden_op_desc, &op_positional,
subcmds, &op_vm);
if (r < 0) {
err = -r;
goto done;
return -r;
}
if (op_vm.count("help")) {
usage(argv[0], op_desc);
err = 0;
goto done;
return 0;
}
if (outpath.empty()) {
usage(argv[0], op_desc);
err = EINVAL;
goto done;
return EINVAL;
}
if (dstart > dstop) {
std::cerr << "error: 'start' version (value: " << dstart << ") "
<< " is greater than 'stop' version (value: " << dstop << ")"
<< std::endl;
err = EINVAL;
goto done;
return EINVAL;
}
TraceIter iter(outpath.c_str());
@ -1153,20 +1160,17 @@ int main(int argc, char **argv) {
po::notify(op_vm);
} catch (po::error &e) {
std::cerr << "error: " << e.what() << std::endl;
err = EINVAL;
goto done;
return EINVAL;
}
if (op_vm.count("help")) {
usage(argv[0], op_desc);
err = 0;
goto done;
return 0;
}
if (inpath.empty()) {
usage(argv[0], op_desc);
err = EINVAL;
goto done;
return EINVAL;
}
unsigned num = 0;
@ -1206,14 +1210,12 @@ int main(int argc, char **argv) {
po::notify(op_vm);
} catch (po::error &e) {
std::cerr << "error: " << e.what() << std::endl;
err = EINVAL;
goto done;
return EINVAL;
}
if (op_vm.count("help")) {
usage(argv[0], op_desc);
err = 0;
goto done;
return 0;
}
unsigned num = 0;
@ -1236,8 +1238,7 @@ int main(int argc, char **argv) {
} else if (cmd == "store-copy") {
if (subcmds.size() < 1 || subcmds[0].empty()) {
usage(argv[0], desc);
err = EINVAL;
goto done;
return EINVAL;
}
string out_path = subcmds[0];
@ -1248,7 +1249,7 @@ int main(int argc, char **argv) {
int r = out_store.create_and_open(ss);
if (r < 0) {
std::cerr << ss.str() << std::endl;
goto done;
return err;
}
}
@ -1297,10 +1298,6 @@ int main(int argc, char **argv) {
} else {
std::cerr << "Unrecognized command: " << cmd << std::endl;
usage(argv[0], desc);
goto done;
return err;
}
done:
st.close();
return err;
}