Merge pull request #14210 from liewegas/wip-osd-dup

os: allow offline conversion of filestore -> bluestore (or anything else)

Reviewed-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
Sage Weil 2017-04-06 09:51:18 -05:00 committed by GitHub
commit 092aa00654
12 changed files with 421 additions and 17 deletions

View File

@ -403,6 +403,8 @@ public:
OP_COLL_HINT = 40, // cid, type, bl
OP_TRY_RENAME = 41, // oldcid, oldoid, newoid
OP_COLL_SET_BITS = 42, // cid, bits
};
// Transaction hint type
@ -429,7 +431,8 @@ public:
};
__le64 expected_object_size; //OP_SETALLOCHINT
__le64 expected_write_size; //OP_SETALLOCHINT
__le32 split_bits; //OP_SPLIT_COLLECTION2
__le32 split_bits; //OP_SPLIT_COLLECTION2,OP_COLL_SET_BITS,
//OP_MKCOLL
__le32 split_rem; //OP_SPLIT_COLLECTION2
} __attribute__ ((packed)) ;
@ -677,6 +680,7 @@ public:
case OP_COLL_RMATTR:
case OP_COLL_SETATTRS:
case OP_COLL_HINT:
case OP_COLL_SET_BITS:
assert(op->cid < cm.size());
op->cid = cm[op->cid];
break;
@ -1392,6 +1396,16 @@ public:
data.ops++;
}
void collection_set_bits(
coll_t cid,
int bits) {
Op* _op = _get_next_op();
_op->op = OP_COLL_SET_BITS;
_op->cid = _get_coll_id(cid);
_op->split_bits = bits;
data.ops++;
}
/// Set allocation hint for an object
/// make 0 values(expected_object_size, expected_write_size) noops for all implementations
void set_alloc_hint(
@ -1836,12 +1850,11 @@ public:
* return the number of significant bits of the coll_t::pgid.
*
* This should return what the last create_collection or split_collection
* set. A lazy backend can choose not to store and report this (e.g.,
* FileStore).
* set. A legacy backend may return -EAGAIN if the value is unavailable
* (because we upgraded from an older version, e.g., FileStore).
*/
virtual int collection_bits(const coll_t& c) {
return -EOPNOTSUPP;
}
virtual int collection_bits(const coll_t& c) = 0;
/**
* list contents of a collection that fall in the range [start, end) and no more than a specified many result

View File

@ -219,6 +219,15 @@ void ObjectStore::Transaction::dump(ceph::Formatter *f)
}
break;
case Transaction::OP_COLL_SET_BITS:
{
coll_t cid = i.get_cid(op->cid);
f->dump_string("op_name", "coll_set_bits");
f->dump_stream("collection") << cid;
f->dump_unsigned("bits", op->split_bits);
}
break;
case Transaction::OP_RMCOLL:
{
coll_t cid = i.get_cid(op->cid);

View File

@ -1851,7 +1851,7 @@ void FileStore::init_temp_collections()
temps.erase(temp);
} else {
dout(10) << __func__ << " creating " << temp << dendl;
r = _create_collection(temp, spos);
r = _create_collection(temp, 0, spos);
assert(r == 0);
}
}
@ -2743,11 +2743,19 @@ void FileStore::_do_transaction(
const coll_t &cid = i.get_cid(op->cid);
tracepoint(objectstore, mkcoll_enter, osr_name);
if (_check_replay_guard(cid, spos) > 0)
r = _create_collection(cid, spos);
r = _create_collection(cid, op->split_bits, spos);
tracepoint(objectstore, mkcoll_exit, r);
}
break;
case Transaction::OP_COLL_SET_BITS:
{
const coll_t &cid = i.get_cid(op->cid);
int bits = op->split_bits;
r = _collection_set_bits(cid, bits);
}
break;
case Transaction::OP_COLL_HINT:
{
const coll_t &cid = i.get_cid(op->cid);
@ -4794,6 +4802,53 @@ int FileStore::collection_empty(const coll_t& c, bool *empty)
tracepoint(objectstore, collection_empty_exit, *empty);
return 0;
}
int FileStore::_collection_set_bits(const coll_t& c, int bits)
{
char fn[PATH_MAX];
get_cdir(c, fn, sizeof(fn));
dout(10) << "collection_set_bits " << fn << " " << bits << dendl;
char n[PATH_MAX];
int r;
int32_t v = bits;
int fd = ::open(fn, O_RDONLY);
if (fd < 0) {
r = -errno;
goto out;
}
get_attrname("bits", n, PATH_MAX);
r = chain_fsetxattr(fd, n, (char*)&v, sizeof(v));
VOID_TEMP_FAILURE_RETRY(::close(fd));
out:
dout(10) << "collection_setattr " << fn << " " << bits << " = " << r << dendl;
return r;
}
int FileStore::collection_bits(const coll_t& c)
{
char fn[PATH_MAX];
get_cdir(c, fn, sizeof(fn));
dout(15) << "collection_bits " << fn << dendl;
int r;
char n[PATH_MAX];
int32_t bits;
int fd = ::open(fn, O_RDONLY);
if (fd < 0) {
r = -errno;
goto out;
}
get_attrname("bits", n, PATH_MAX);
r = chain_fgetxattr(fd, n, (char*)&bits, sizeof(bits));
VOID_TEMP_FAILURE_RETRY(::close(fd));
if (r < 0) {
bits = r;
goto out;
}
out:
dout(10) << "collection_bits " << fn << " = " << bits << dendl;
return bits;
}
int FileStore::collection_list(const coll_t& c,
const ghobject_t& orig_start,
const ghobject_t& end,
@ -5082,6 +5137,7 @@ int FileStore::_collection_hint_expected_num_objs(const coll_t& c, uint32_t pg_n
int FileStore::_create_collection(
const coll_t& c,
int bits,
const SequencerPosition &spos)
{
char fn[PATH_MAX];
@ -5099,11 +5155,13 @@ int FileStore::_create_collection(
r = init_index(c);
if (r < 0)
return r;
r = _collection_set_bits(c, bits);
if (r < 0)
return r;
// create parallel temp collection, too
if (!c.is_meta() && !c.is_temp()) {
coll_t temp = c.get_temp();
r = _create_collection(temp, spos);
r = _create_collection(temp, 0, spos);
if (r < 0)
return r;
}
@ -5491,6 +5549,7 @@ int FileStore::_split_collection(const coll_t& cid,
_close_replay_guard(cid, spos);
_close_replay_guard(dest, spos);
}
_collection_set_bits(cid, bits);
if (!r && cct->_conf->filestore_debug_verify_split) {
vector<ghobject_t> objects;
ghobject_t next;

View File

@ -650,8 +650,11 @@ public:
int _collection_remove_recursive(const coll_t &cid,
const SequencerPosition &spos);
int _collection_set_bits(const coll_t& cid, int bits);
// collections
using ObjectStore::collection_list;
int collection_bits(const coll_t& c) override;
int collection_list(const coll_t& c,
const ghobject_t& start, const ghobject_t& end, int max,
vector<ghobject_t> *ls, ghobject_t *next) override;
@ -682,7 +685,8 @@ public:
using ObjectStore::get_omap_iterator;
ObjectMap::ObjectMapIterator get_omap_iterator(const coll_t& c, const ghobject_t &oid) override;
int _create_collection(const coll_t& c, const SequencerPosition &spos);
int _create_collection(const coll_t& c, int bits,
const SequencerPosition &spos);
int _destroy_collection(const coll_t& c);
/**
* Give an expected number of objects hint to the collection.

View File

@ -1400,6 +1400,18 @@ int KStore::collection_empty(const coll_t& cid, bool *empty)
return 0;
}
int KStore::collection_bits(const coll_t& cid)
{
dout(15) << __func__ << " " << cid << dendl;
CollectionHandle ch = _get_collection(cid);
if (!ch)
return -ENOENT;
Collection *c = static_cast<Collection*>(ch.get());
RWLock::RLocker l(c->lock);
dout(10) << __func__ << " " << cid << " = " << c->cnode.bits << dendl;
return c->cnode.bits;
}
int KStore::collection_list(
const coll_t& cid, const ghobject_t& start, const ghobject_t& end, int max,
vector<ghobject_t> *ls, ghobject_t *pnext)

View File

@ -480,7 +480,7 @@ public:
int list_collections(vector<coll_t>& ls) override;
bool collection_exists(const coll_t& c) override;
int collection_empty(const coll_t& c, bool *empty) override;
int collection_bits(const coll_t& c) override;
int collection_list(
const coll_t& cid, const ghobject_t& start, const ghobject_t& end,
int max,

View File

@ -460,6 +460,16 @@ int MemStore::collection_empty(const coll_t& cid, bool *empty)
return 0;
}
int MemStore::collection_bits(const coll_t& cid)
{
dout(10) << __func__ << " " << cid << dendl;
CollectionRef c = get_collection(cid);
if (!c)
return -ENOENT;
RWLock::RLocker l(c->lock);
return c->bits;
}
int MemStore::collection_list(const coll_t& cid,
const ghobject_t& start,
const ghobject_t& end,
@ -854,7 +864,7 @@ void MemStore::_do_transaction(Transaction& t)
case Transaction::OP_MKCOLL:
{
coll_t cid = i.get_cid(op->cid);
r = _create_collection(cid);
r = _create_collection(cid, op->split_bits);
}
break;
@ -1345,7 +1355,7 @@ int MemStore::_omap_setheader(const coll_t& cid, const ghobject_t &oid,
return 0;
}
int MemStore::_create_collection(const coll_t& cid)
int MemStore::_create_collection(const coll_t& cid, int bits)
{
dout(10) << __func__ << " " << cid << dendl;
RWLock::WLocker l(coll_lock);
@ -1353,6 +1363,7 @@ int MemStore::_create_collection(const coll_t& cid)
if (!result.second)
return -EEXIST;
result.first->second.reset(new Collection(cct, cid));
result.first->second->bits = bits;
return 0;
}
@ -1458,6 +1469,9 @@ int MemStore::_split_collection(const coll_t& cid, uint32_t bits, uint32_t match
}
}
sc->bits = bits;
assert(dc->bits == (int)bits);
return 0;
}
namespace {

View File

@ -96,6 +96,7 @@ public:
struct PageSetObject;
struct Collection : public CollectionImpl {
coll_t cid;
int bits;
CephContext *cct;
bool use_page_set;
ceph::unordered_map<ghobject_t, ObjectRef> object_hash; ///< for lookup
@ -223,7 +224,7 @@ private:
int _collection_hint_expected_num_objs(const coll_t& cid, uint32_t pg_num,
uint64_t num_objs) const { return 0; }
int _create_collection(const coll_t& c);
int _create_collection(const coll_t& c, int bits);
int _destroy_collection(const coll_t& c);
int _collection_add(const coll_t& cid, const coll_t& ocid, const ghobject_t& oid);
int _collection_move_rename(const coll_t& oldcid, const ghobject_t& oldoid,
@ -325,6 +326,7 @@ public:
}
bool collection_exists(const coll_t& c) override;
int collection_empty(const coll_t& c, bool *empty) override;
int collection_bits(const coll_t& c) override;
using ObjectStore::collection_list;
int collection_list(const coll_t& cid,
const ghobject_t& start, const ghobject_t& end, int max,

View File

@ -5850,6 +5850,21 @@ void PG::update_store_with_options()
if(r < 0 && r != -EOPNOTSUPP) {
derr << __func__ << "set_collection_opts returns error:" << r << dendl;
}
if (osd->store->get_type() == "filestore") {
// legacy filestore didn't store collection bit width; fix.
int bits = osd->store->collection_bits(coll);
if (bits < 0) {
if (coll.is_meta())
bits = 0;
else
bits = info.pgid.get_split_bits(pool.info.get_pg_num());
lderr(cct) << __func__ << " setting bit width to " << bits << dendl;
ObjectStore::Transaction t;
t.collection_set_bits(coll, bits);
osd->store->apply_transaction(osr.get(), std::move(t));
}
}
}
std::ostream& operator<<(std::ostream& oss,

View File

@ -27,6 +27,7 @@ add_ceph_test(osd-scrub-repair.sh ${CMAKE_CURRENT_SOURCE_DIR}/osd-scrub-repair.s
add_ceph_test(osd-scrub-snaps.sh ${CMAKE_CURRENT_SOURCE_DIR}/osd-scrub-snaps.sh)
add_ceph_test(osd-copy-from.sh ${CMAKE_CURRENT_SOURCE_DIR}/osd-copy-from.sh)
add_ceph_test(osd-fast-mark-down.sh ${CMAKE_CURRENT_SOURCE_DIR}/osd-fast-mark-down.sh)
add_ceph_test(osd-dup.sh ${CMAKE_CURRENT_SOURCE_DIR}/osd-dup.sh)
# unittest_osdmap
add_executable(unittest_osdmap

72
src/test/osd/osd-dup.sh Executable file
View File

@ -0,0 +1,72 @@
#!/bin/bash
source $(dirname $0)/../detect-build-env-vars.sh
source $CEPH_ROOT/qa/workunits/ceph-helpers.sh
function run() {
local dir=$1
shift
export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
CEPH_ARGS+="--enable-experimental-unrecoverable-data-corrupting-features bluestore "
local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
for func in $funcs ; do
setup $dir || return 1
$func $dir || return 1
teardown $dir || return 1
done
}
function TEST_filestore_to_bluestore() {
local dir=$1
run_mon $dir a || return 1
run_osd $dir 0 || return 1
osd_pid=$(cat $dir/osd.0.pid)
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
sleep 5
ceph osd pool create foo 16
# write some objects
rados bench -p foo 10 write -b 4096 --no-cleanup || return 1
# kill
while kill $osd_pid; do sleep 1 ; done
ceph osd down 0
mv $dir/0 $dir/0.old || return 1
mkdir $dir/0 || return 1
ofsid=$(cat $dir/0.old/fsid)
echo "osd fsid $ofsid"
O=$CEPH_ARGS
CEPH_ARGS+="--log-file $dir/cot.log --log-max-recent 0 "
ceph-objectstore-tool --type bluestore --data-path $dir/0 --fsid $ofsid \
--op mkfs || return 1
ceph-objectstore-tool --data-path $dir/0.old --target-data-path $dir/0 \
--op dup || return 1
CEPH_ARGS=$O
run_osd $dir 0 || return 1
while ! ceph osd stat | grep '3 up' ; do sleep 1 ; done
ceph osd metadata 0 | grep bluestore || return 1
ceph osd scrub 0
# give it some time
sleep 15
ceph -s | grep '20 active+clean' || return 1
}
main osd-dup "$@"
# Local Variables:
# compile-command: "cd ../.. ; make -j4 && test/osd/osd-dup.sh"
# End:

View File

@ -2184,6 +2184,166 @@ int remove_clone(ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cl
return 0;
}
int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst)
{
cout << "dup from " << src->get_type() << ": " << srcpath << "\n"
<< " to " << dst->get_type() << ": " << dstpath
<< std::endl;
ObjectStore::Sequencer osr("dup");
int num, i;
vector<coll_t> collections;
int r;
r = src->mount();
if (r < 0) {
cerr << "failed to mount src: " << cpp_strerror(r) << std::endl;
return r;
}
r = dst->mount();
if (r < 0) {
cerr << "failed to mount dst: " << cpp_strerror(r) << std::endl;
goto out_src;
}
if (src->get_fsid() != dst->get_fsid()) {
cerr << "src fsid " << src->get_fsid() << " != dest " << dst->get_fsid()
<< std::endl;
goto out;
}
cout << "fsid " << src->get_fsid() << std::endl;
// make sure dst is empty
r = dst->list_collections(collections);
if (r < 0) {
cerr << "error listing collections on dst: " << cpp_strerror(r) << std::endl;
goto out;
}
if (!collections.empty()) {
cerr << "destination store is not empty" << std::endl;
goto out;
}
r = src->list_collections(collections);
if (r < 0) {
cerr << "error listing collections on src: " << cpp_strerror(r) << std::endl;
goto out;
}
num = collections.size();
cout << num << " collections" << std::endl;
i = 1;
for (auto cid : collections) {
cout << i++ << "/" << num << " " << cid << std::endl;
{
ObjectStore::Transaction t;
int bits = src->collection_bits(cid);
if (bits < 0) {
cerr << "cannot get bit count for collection " << cid << ": "
<< cpp_strerror(bits) << std::endl;
goto out;
}
t.create_collection(cid, bits);
dst->apply_transaction(&osr, std::move(t));
}
ghobject_t pos;
uint64_t n = 0;
uint64_t bytes = 0, keys = 0;
while (true) {
vector<ghobject_t> ls;
r = src->collection_list(cid, pos, ghobject_t::get_max(), 1000, &ls, &pos);
if (r < 0) {
cerr << "collection_list on " << cid << " from " << pos << " got: "
<< cpp_strerror(r) << std::endl;
goto out;
}
if (ls.empty()) {
break;
}
for (auto& oid : ls) {
//cout << " " << cid << " " << oid << std::endl;
if (n % 100 == 0) {
cout << " " << std::setw(16) << n << " objects, "
<< std::setw(16) << bytes << " bytes, "
<< std::setw(16) << keys << " keys"
<< std::setw(1) << "\r" << std::flush;
}
n++;
ObjectStore::Transaction t;
t.touch(cid, oid);
map<string,bufferptr> attrs;
src->getattrs(cid, oid, attrs);
if (!attrs.empty()) {
t.setattrs(cid, oid, attrs);
}
bufferlist bl;
src->read(cid, oid, 0, 0, bl);
if (bl.length()) {
t.write(cid, oid, 0, bl.length(), bl);
bytes += bl.length();
}
bufferlist header;
map<string,bufferlist> omap;
src->omap_get(cid, oid, &header, &omap);
if (header.length()) {
t.omap_setheader(cid, oid, header);
++keys;
}
if (!omap.empty()) {
keys += omap.size();
t.omap_setkeys(cid, oid, omap);
}
dst->apply_transaction(&osr, std::move(t));
}
}
cout << " " << std::setw(16) << n << " objects, "
<< std::setw(16) << bytes << " bytes, "
<< std::setw(16) << keys << " keys"
<< std::setw(1) << std::endl;
}
// keyring
cout << "keyring" << std::endl;
{
bufferlist bl;
string s = srcpath + "/keyring";
string err;
r = bl.read_file(s.c_str(), &err);
if (r < 0) {
cerr << "failed to copy " << s << ": " << err << std::endl;
} else {
string d = dstpath + "/keyring";
bl.write_file(d.c_str(), 0600);
}
}
// osd metadata
cout << "duping osd metadata" << std::endl;
{
for (auto k : {"magic", "whoami", "ceph_fsid", "fsid"}) {
string val;
src->read_meta(k, &val);
dst->write_meta(k, val);
}
}
dst->write_meta("ready", "ready");
cout << "done." << std::endl;
r = 0;
out:
dst->umount();
out_src:
src->umount();
return r;
}
void usage(po::options_description &desc)
{
cerr << std::endl;
@ -2301,6 +2461,7 @@ int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock,
int main(int argc, char **argv)
{
string dpath, jpath, pgidstr, op, file, mountpoint, mon_store_path, object;
string target_data_path, fsid;
string objcmd, arg1, arg2, type, format, argnspace, pool;
boost::optional<std::string> nspace;
spg_t pgid;
@ -2315,7 +2476,7 @@ int main(int argc, char **argv)
desc.add_options()
("help", "produce help message")
("type", po::value<string>(&type),
"Arg is one of [filestore (default), memstore]")
"Arg is one of [bluestore, filestore (default), memstore]")
("data-path", po::value<string>(&dpath),
"path to object store, mandatory")
("journal-path", po::value<string>(&jpath),
@ -2325,7 +2486,7 @@ int main(int argc, char **argv)
("pool", po::value<string>(&pool),
"Pool name, mandatory for apply-layout-settings if --pgid is not specified")
("op", po::value<string>(&op),
"Arg is one of [info, log, remove, mkfs, fsck, fuse, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, "
"Arg is one of [info, log, remove, mkfs, fsck, fuse, dup, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, "
"get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, apply-layout-settings, update-mon-db]")
("epoch", po::value<unsigned>(&epoch),
"epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
@ -2333,6 +2494,10 @@ int main(int argc, char **argv)
"path of file to export, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
("mon-store-path", po::value<string>(&mon_store_path),
"path of monstore to update-mon-db")
("fsid", po::value<string>(&fsid),
"fsid for new store created by mkfs")
("target-data-path", po::value<string>(&target_data_path),
"path of target object store (for --op dup)")
("mountpoint", po::value<string>(&mountpoint),
"fuse mountpoint")
("format", po::value<string>(&format)->default_value("json-pretty"),
@ -2580,6 +2745,15 @@ int main(int argc, char **argv)
return 0;
}
if (op == "mkfs") {
if (fsid.length()) {
uuid_d f;
bool r = f.parse(fsid.c_str());
if (!r) {
cerr << "failed to parse uuid '" << fsid << "'" << std::endl;
return 1;
}
fs->set_fsid(f);
}
int r = fs->mkfs();
if (r < 0) {
cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
@ -2587,6 +2761,35 @@ int main(int argc, char **argv)
}
return 0;
}
if (op == "dup") {
string target_type;
char fn[PATH_MAX];
snprintf(fn, sizeof(fn), "%s/type", target_data_path.c_str());
int fd = ::open(fn, O_RDONLY);
if (fd < 0) {
cerr << "Unable to open " << target_data_path << "/type" << std::endl;
exit(1);
}
bufferlist bl;
bl.read_fd(fd, 64);
if (bl.length()) {
target_type = string(bl.c_str(), bl.length() - 1); // drop \n
}
::close(fd);
ObjectStore *targetfs = ObjectStore::create(
g_ceph_context, target_type,
target_data_path, "", 0);
if (targetfs == NULL) {
cerr << "Unable to open store of type " << target_type << std::endl;
return 1;
}
int r = dup(dpath, fs, target_data_path, targetfs);
if (r < 0) {
cerr << "dup failed: " << cpp_strerror(r) << std::endl;
return 1;
}
return 0;
}
ObjectStore::Sequencer *osr = new ObjectStore::Sequencer(__func__);
int ret = fs->mount();