mirror of
https://github.com/ceph/ceph
synced 2024-12-28 06:23:08 +00:00
Merge pull request #43686 from myoungwon/wip-ceph-dedup-tool-make-dedup-obj
tool: add chunk/object-dedup command Reviewed-by: Samuel Just <sjust@redhat.com>
This commit is contained in:
commit
7c326866e8
@ -242,9 +242,74 @@ function test_dedup_chunk_repair()
|
||||
$RADOS_TOOL -p $POOL rm bar
|
||||
}
|
||||
|
||||
function test_dedup_object()
|
||||
{
|
||||
|
||||
CHUNK_POOL=dedup_chunk_pool
|
||||
run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8
|
||||
|
||||
echo "There hiHI" > foo
|
||||
|
||||
$RADOS_TOOL -p $POOL put foo ./foo
|
||||
|
||||
sleep 2
|
||||
|
||||
rados ls -p $CHUNK_POOL
|
||||
|
||||
RESULT=$($DEDUP_TOOL --pool $POOL --op chunk-dedup --object foo --chunk-pool $CHUNK_POOL --source-off 0 --source-length 10 --fingerprint-algorithm sha1 )
|
||||
|
||||
POOL_ID=$($CEPH_TOOL osd pool ls detail | grep $POOL | awk '{print$2}')
|
||||
CHUNK_OID=$(echo -n "There hiHI" | sha1sum | awk '{print $1}')
|
||||
|
||||
RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep foo)
|
||||
|
||||
if [ -z "$RESULT" ] ; then
|
||||
$CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
|
||||
$CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
|
||||
die "Scrub failed expecting bar is removed"
|
||||
fi
|
||||
|
||||
$RADOS_TOOL -p $CHUNK_POOL get $CHUNK_OID ./chunk
|
||||
VERIFY=$(cat ./chunk | sha1sum | awk '{print $1}')
|
||||
if [ "$CHUNK_OID" != "$VERIFY" ] ; then
|
||||
$CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
|
||||
$CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
|
||||
die "Comparing failed expecting chunk mismatch"
|
||||
fi
|
||||
|
||||
echo -n "There hihiHI" > bar
|
||||
|
||||
$RADOS_TOOL -p $POOL put bar ./bar
|
||||
RESULT=$($DEDUP_TOOL --pool $POOL --op object-dedup --object bar --chunk-pool $CHUNK_POOL --fingerprint-algorithm sha1 --dedup-cdc-chunk-size 4096)
|
||||
|
||||
CHUNK_OID=$(echo -n "There hihiHI" | sha1sum | awk '{print $1}')
|
||||
|
||||
RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar)
|
||||
if [ -z "$RESULT" ] ; then
|
||||
$CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
|
||||
$CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
|
||||
die "Scrub failed expecting bar is removed"
|
||||
fi
|
||||
|
||||
$RADOS_TOOL -p $CHUNK_POOL get $CHUNK_OID ./chunk
|
||||
VERIFY=$(cat ./chunk | sha1sum | awk '{print $1}')
|
||||
if [ "$CHUNK_OID" != "$VERIFY" ] ; then
|
||||
$CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
|
||||
$CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
|
||||
die "Comparing failed expecting chunk mismatch"
|
||||
fi
|
||||
|
||||
$CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it
|
||||
|
||||
rm -rf ./foo ./bar ./chunk
|
||||
$RADOS_TOOL -p $POOL rm foo
|
||||
$RADOS_TOOL -p $POOL rm bar
|
||||
}
|
||||
|
||||
test_dedup_ratio_fixed
|
||||
test_dedup_chunk_scrub
|
||||
test_dedup_chunk_repair
|
||||
test_dedup_object
|
||||
|
||||
$CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it
|
||||
|
||||
|
@ -142,6 +142,8 @@ void usage()
|
||||
" [--op chunk-put-ref --chunk-pool POOL --object OID --target-ref OID --target-ref-pool-id POOL_ID] \n"
|
||||
" [--op chunk-repair --chunk-pool POOL --object OID --target-ref OID --target-ref-pool-id POOL_ID] \n"
|
||||
" [--op dump-chunk-refs --chunk-pool POOL --object OID] \n"
|
||||
" [--op chunk-dedup --pool POOL --object OID --chunk-pool POOL --fingerprint-algorithm FP --source-off OFFSET --source-length LENGTH] \n"
|
||||
" [--op object-dedup --pool POOL --object OID --chunk-pool POOL --fingerprint-algorithm FP --dedup-cdc-chunk-size CHUNK_SIZE] \n"
|
||||
<< std::endl;
|
||||
cout << "optional arguments: " << std::endl;
|
||||
cout << " --object <object_name> " << std::endl;
|
||||
@ -153,6 +155,9 @@ void usage()
|
||||
cout << " --report-period <seconds> " << std::endl;
|
||||
cout << " --max-seconds <seconds>" << std::endl;
|
||||
cout << " --max-read-size <bytes> " << std::endl;
|
||||
cout << "explanations: " << std::endl;
|
||||
cout << " chunk-dedup performs deduplication using a chunk generated by given source" << std::endl;
|
||||
cout << " offset and length. object-dedup deduplicates the entire object, not a chunk" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -951,6 +956,253 @@ out:
|
||||
return (ret < 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
string make_pool_str(string pool, string var, string val)
|
||||
{
|
||||
return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
|
||||
+ string("\",\"var\": \"") + var + string("\",\"val\": \"")
|
||||
+ val + string("\"}");
|
||||
}
|
||||
|
||||
string make_pool_str(string pool, string var, int val)
|
||||
{
|
||||
return make_pool_str(pool, var, stringify(val));
|
||||
}
|
||||
|
||||
int make_dedup_object(const std::map < std::string, std::string > &opts,
|
||||
std::vector<const char*> &nargs)
|
||||
{
|
||||
Rados rados;
|
||||
IoCtx io_ctx, chunk_io_ctx;
|
||||
std::string object_name, chunk_pool_name, op_name, pool_name, fp_algo;
|
||||
int ret;
|
||||
std::map<std::string, std::string>::const_iterator i;
|
||||
|
||||
i = opts.find("op_name");
|
||||
if (i != opts.end()) {
|
||||
op_name = i->second;
|
||||
} else {
|
||||
cerr << "must specify op" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
i = opts.find("pool");
|
||||
if (i != opts.end()) {
|
||||
pool_name = i->second;
|
||||
} else {
|
||||
cerr << "must specify --pool" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
i = opts.find("object");
|
||||
if (i != opts.end()) {
|
||||
object_name = i->second;
|
||||
} else {
|
||||
cerr << "must specify object" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
i = opts.find("chunk-pool");
|
||||
if (i != opts.end()) {
|
||||
chunk_pool_name = i->second;
|
||||
} else {
|
||||
cerr << "must specify --chunk-pool" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
i = opts.find("pgid");
|
||||
boost::optional<pg_t> pgid(i != opts.end(), pg_t());
|
||||
|
||||
ret = rados.init_with_context(g_ceph_context);
|
||||
if (ret < 0) {
|
||||
cerr << "couldn't initialize rados: " << cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
ret = rados.connect();
|
||||
if (ret) {
|
||||
cerr << "couldn't connect to cluster: " << cpp_strerror(ret) << std::endl;
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
ret = rados.ioctx_create(pool_name.c_str(), io_ctx);
|
||||
if (ret < 0) {
|
||||
cerr << "error opening pool "
|
||||
<< chunk_pool_name << ": "
|
||||
<< cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
ret = rados.ioctx_create(chunk_pool_name.c_str(), chunk_io_ctx);
|
||||
if (ret < 0) {
|
||||
cerr << "error opening pool "
|
||||
<< chunk_pool_name << ": "
|
||||
<< cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
i = opts.find("fingerprint-algorithm");
|
||||
if (i != opts.end()) {
|
||||
fp_algo = i->second.c_str();
|
||||
if (fp_algo != "sha1"
|
||||
&& fp_algo != "sha256" && fp_algo != "sha512") {
|
||||
cerr << "unrecognized fingerprint-algorithm " << fp_algo << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (op_name == "chunk-dedup") {
|
||||
uint64_t offset, length;
|
||||
string chunk_object;
|
||||
i = opts.find("source-off");
|
||||
if (i != opts.end()) {
|
||||
if (rados_sistrtoll(i, &offset)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
cerr << "must specify --source-off" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
i = opts.find("source-length");
|
||||
if (i != opts.end()) {
|
||||
if (rados_sistrtoll(i, &length)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
cerr << "must specify --source-off" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
// 1. make a copy from manifest object to chunk object
|
||||
bufferlist bl;
|
||||
ret = io_ctx.read(object_name, bl, length, offset);
|
||||
if (ret < 0) {
|
||||
cerr << " reading object in base pool fails : " << cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
chunk_object = [&fp_algo, &bl]() -> string {
|
||||
if (fp_algo == "sha1") {
|
||||
return ceph::crypto::digest<ceph::crypto::SHA1>(bl).to_str();
|
||||
} else if (fp_algo == "sha256") {
|
||||
return ceph::crypto::digest<ceph::crypto::SHA256>(bl).to_str();
|
||||
} else if (fp_algo == "sha512") {
|
||||
return ceph::crypto::digest<ceph::crypto::SHA512>(bl).to_str();
|
||||
} else {
|
||||
assert(0 == "unrecognized fingerprint type");
|
||||
return {};
|
||||
}
|
||||
}();
|
||||
ret = chunk_io_ctx.write(chunk_object, bl, length, offset);
|
||||
if (ret < 0) {
|
||||
cerr << " writing object in chunk pool fails : " << cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
// 2. call set_chunk
|
||||
ObjectReadOperation op;
|
||||
op.set_chunk(offset, length, chunk_io_ctx, chunk_object, 0,
|
||||
CEPH_OSD_OP_FLAG_WITH_REFERENCE);
|
||||
ret = io_ctx.operate(object_name, &op, NULL);
|
||||
if (ret < 0) {
|
||||
cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
} else if (op_name == "object-dedup") {
|
||||
unsigned chunk_size;
|
||||
i = opts.find("dedup-cdc-chunk-size");
|
||||
if (i != opts.end()) {
|
||||
if (rados_sistrtoll(i, &chunk_size)) {
|
||||
cerr << "unrecognized dedup_cdc_chunk_size " << chunk_size << std::endl;
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
bufferlist inbl;
|
||||
ret = rados.mon_command(
|
||||
make_pool_str(pool_name, "fingerprint_algorithm", fp_algo),
|
||||
inbl, NULL, NULL);
|
||||
if (ret < 0) {
|
||||
cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
|
||||
return ret;
|
||||
}
|
||||
ret = rados.mon_command(
|
||||
make_pool_str(pool_name, "dedup_tier", chunk_pool_name),
|
||||
inbl, NULL, NULL);
|
||||
if (ret < 0) {
|
||||
cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
|
||||
return ret;
|
||||
}
|
||||
ret = rados.mon_command(
|
||||
make_pool_str(pool_name, "dedup_chunk_algorithm", "fastcdc"),
|
||||
inbl, NULL, NULL);
|
||||
if (ret < 0) {
|
||||
cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
|
||||
return ret;
|
||||
}
|
||||
ret = rados.mon_command(
|
||||
make_pool_str(pool_name, "dedup_cdc_chunk_size", chunk_size),
|
||||
inbl, NULL, NULL);
|
||||
if (ret < 0) {
|
||||
cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: add a better way to make an object a manifest object.
|
||||
* We're using set_chunk with an incorrect object here simply to make
|
||||
* the object a manifest object, the tier_flush() will remove
|
||||
* it and replace it with the real contents.
|
||||
*/
|
||||
// convert object to manifest object
|
||||
ObjectWriteOperation op;
|
||||
bufferlist temp;
|
||||
temp.append("temp");
|
||||
op.write_full(temp);
|
||||
|
||||
auto gen_r_num = [] () -> string {
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_int_distribution<uint64_t> dist;
|
||||
uint64_t r_num = dist(gen);
|
||||
return to_string(r_num);
|
||||
};
|
||||
string temp_oid = gen_r_num();
|
||||
// create temp chunk object for set-chunk
|
||||
ret = chunk_io_ctx.operate(temp_oid, &op);
|
||||
if (ret == -EEXIST) {
|
||||
// one more try
|
||||
temp_oid = gen_r_num();
|
||||
ret = chunk_io_ctx.operate(temp_oid, &op);
|
||||
}
|
||||
if (ret < 0) {
|
||||
cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// set-chunk to make manifest object
|
||||
ObjectReadOperation chunk_op;
|
||||
chunk_op.set_chunk(0, 4, chunk_io_ctx, temp_oid, 0,
|
||||
CEPH_OSD_OP_FLAG_WITH_REFERENCE);
|
||||
ret = io_ctx.operate(object_name, &chunk_op, NULL);
|
||||
if (ret < 0) {
|
||||
cerr << " set_chunk fail : " << cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// tier-flush to perform deduplication
|
||||
ObjectReadOperation flush_op;
|
||||
flush_op.tier_flush();
|
||||
ret = io_ctx.operate(object_name, &flush_op, NULL);
|
||||
if (ret < 0) {
|
||||
cerr << " tier_flush fail : " << cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// tier-evict
|
||||
ObjectReadOperation evict_op;
|
||||
evict_op.tier_evict();
|
||||
ret = io_ctx.operate(object_name, &evict_op, NULL);
|
||||
if (ret < 0) {
|
||||
cerr << " tier_evict fail : " << cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return (ret < 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
auto args = argv_to_vec(argc, argv);
|
||||
@ -1009,6 +1261,14 @@ int main(int argc, const char **argv)
|
||||
opts["min-chunk-size"] = val;
|
||||
} else if (ceph_argparse_witharg(args, i, &val, "--max-chunk-size", (char*)NULL)) {
|
||||
opts["max-chunk-size"] = val;
|
||||
} else if (ceph_argparse_witharg(args, i, &val, "--chunk-object", (char*)NULL)) {
|
||||
opts["chunk-object"] = val;
|
||||
} else if (ceph_argparse_witharg(args, i, &val, "--source-off", (char*)NULL)) {
|
||||
opts["source-off"] = val;
|
||||
} else if (ceph_argparse_witharg(args, i, &val, "--source-length", (char*)NULL)) {
|
||||
opts["source-length"] = val;
|
||||
} else if (ceph_argparse_witharg(args, i, &val, "--dedup-cdc-chunk-size", (char*)NULL)) {
|
||||
opts["dedup-cdc-chunk-size"] = val;
|
||||
} else if (ceph_argparse_flag(args, i, "--debug", (char*)NULL)) {
|
||||
opts["debug"] = "true";
|
||||
} else {
|
||||
@ -1028,6 +1288,19 @@ int main(int argc, const char **argv)
|
||||
op_name == "chunk-repair" ||
|
||||
op_name == "dump-chunk-refs") {
|
||||
return chunk_scrub_common(opts, args);
|
||||
} else if (op_name == "chunk-dedup" ||
|
||||
op_name == "object-dedup") {
|
||||
/*
|
||||
* chunk-dedup:
|
||||
* using a chunk generated by given source,
|
||||
* create a new object in the chunk pool or increase the reference
|
||||
* if the object exists
|
||||
*
|
||||
* object-dedup:
|
||||
* perform deduplication on the entire object, not a chunk.
|
||||
*
|
||||
*/
|
||||
return make_dedup_object(opts, args);
|
||||
} else {
|
||||
cerr << "unrecognized op " << op_name << std::endl;
|
||||
exit(1);
|
||||
|
Loading…
Reference in New Issue
Block a user