common,osd: add hash algorithms for dedup (sha256, sha512)

Signed-off-by: Myoungwon Oh <omwmw@sk.com>
This commit is contained in:
myoungwon oh 2019-05-27 16:41:15 +09:00
parent f1ea02970b
commit f745a7d35f
7 changed files with 98 additions and 27 deletions

View File

@ -2043,6 +2043,8 @@ void buffer::list::invalidate_crc()
#include "common/ceph_crypto.h"
using ceph::crypto::SHA1;
using ceph::crypto::SHA256;
using ceph::crypto::SHA512;
sha1_digest_t buffer::list::sha1()
{
@ -2055,6 +2057,28 @@ sha1_digest_t buffer::list::sha1()
return sha1_digest_t(fingerprint);
}
sha256_digest_t buffer::list::sha256()
{
unsigned char fingerprint[CEPH_CRYPTO_SHA256_DIGESTSIZE];
SHA256 sha256_gen;
for (auto& p : _buffers) {
sha256_gen.Update((const unsigned char *)p.c_str(), p.length());
}
sha256_gen.Final(fingerprint);
return sha256_digest_t(fingerprint);
}
sha512_digest_t buffer::list::sha512()
{
unsigned char fingerprint[CEPH_CRYPTO_SHA512_DIGESTSIZE];
SHA512 sha512_gen;
for (auto& p : _buffers) {
sha512_gen.Update((const unsigned char *)p.c_str(), p.length());
}
sha512_gen.Final(fingerprint);
return sha512_digest_t(fingerprint);
}
/**
* Binary write all contents to a C++ stream
*/

View File

@ -10,6 +10,7 @@
#define CEPH_CRYPTO_SHA1_DIGESTSIZE 20
#define CEPH_CRYPTO_HMACSHA256_DIGESTSIZE 32
#define CEPH_CRYPTO_SHA256_DIGESTSIZE 32
#define CEPH_CRYPTO_SHA512_DIGESTSIZE 64
#ifdef USE_NSS
// you *must* use CRYPTO_CXXFLAGS in CMakeLists.txt for including this include
@ -33,6 +34,7 @@ extern "C" {
const EVP_MD *EVP_md5(void);
const EVP_MD *EVP_sha1(void);
const EVP_MD *EVP_sha256(void);
const EVP_MD *EVP_sha512(void);
}
#endif /*USE_OPENSSL*/
@ -115,6 +117,11 @@ namespace ceph {
public:
SHA256 () : NSSDigest(SEC_OID_SHA256, CEPH_CRYPTO_SHA256_DIGESTSIZE) { }
};
class SHA512 : public NSSDigest {
public:
SHA512 () : NSSDigest(SEC_OID_SHA512, CEPH_CRYPTO_SHA512_DIGESTSIZE) { }
};
}
}
}
@ -150,6 +157,11 @@ namespace ceph {
public:
SHA256 () : OpenSSLDigest(EVP_sha256()) { }
};
class SHA512 : public OpenSSLDigest {
public:
SHA512 () : OpenSSLDigest(EVP_sha512()) { }
};
}
}
}
@ -337,6 +349,7 @@ namespace ceph {
using ceph::crypto::ssl::SHA256;
using ceph::crypto::ssl::MD5;
using ceph::crypto::ssl::SHA1;
using ceph::crypto::ssl::SHA512;
using ceph::crypto::ssl::HMACSHA256;
using ceph::crypto::ssl::HMACSHA1;
@ -348,6 +361,7 @@ namespace ceph {
using ceph::crypto::nss::SHA256;
using ceph::crypto::nss::MD5;
using ceph::crypto::nss::SHA1;
using ceph::crypto::nss::SHA512;
using ceph::crypto::nss::HMACSHA256;
using ceph::crypto::nss::HMACSHA1;

View File

@ -76,6 +76,8 @@ class deleter;
template<uint8_t S>
struct sha_digest_t;
using sha1_digest_t = sha_digest_t<20>;
using sha256_digest_t = sha_digest_t<32>;
using sha512_digest_t = sha_digest_t<64>;
template<typename T> class DencDumper;
@ -1233,6 +1235,8 @@ inline namespace v14_2_0 {
uint32_t crc32c(uint32_t crc) const;
void invalidate_crc();
sha1_digest_t sha1();
sha256_digest_t sha256();
sha512_digest_t sha512();
// These functions return a bufferlist with a pointer to a single
// static buffer. They /must/ not outlive the memory they

View File

@ -623,5 +623,8 @@ WRITE_CLASS_ENCODER(sha1_digest_t)
using sha256_digest_t = sha_digest_t<32>;
WRITE_CLASS_ENCODER(sha256_digest_t)
using md5_digest_t = sha_digest_t<16>;
WRITE_CLASS_ENCODER(md5_digest_t)
#endif

View File

@ -2472,41 +2472,52 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush
pg_pool_t::fingerprint_t fp_algo_t = pool.info.get_fingerprint_type();
if (iter->second.has_reference() &&
fp_algo_t != pg_pool_t::TYPE_FINGERPRINT_NONE) {
object_t fp_oid;
bufferlist in;
switch (fp_algo_t) {
case pg_pool_t::TYPE_FINGERPRINT_SHA1:
{
sha1_digest_t sha1r = chunk_data.sha1();
object_t fp_oid = sha1r.to_str();
bufferlist in;
if (fp_oid != tgt_soid.oid) {
// decrement old chunk's reference count
ObjectOperation dec_op;
cls_chunk_refcount_put_op put_call;
::encode(put_call, in);
dec_op.call("refcount", "chunk_put", in);
// we don't care dec_op's completion. scrub for dedup will fix this.
tid = osd->objecter->mutate(
tgt_soid.oid, oloc, dec_op, snapc,
ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime),
flags, NULL);
in.clear();
}
tgt_soid.oid = fp_oid;
iter->second.oid = tgt_soid;
// add data op
ceph_osd_op osd_op;
osd_op.extent.offset = 0;
osd_op.extent.length = chunk_data.length();
encode(osd_op, in);
encode(soid, in);
in.append(chunk_data);
obj_op.call("cas", "cas_write_or_get", in);
fp_oid = sha1r.to_str();
break;
}
case pg_pool_t::TYPE_FINGERPRINT_SHA256:
{
sha256_digest_t sha256r = chunk_data.sha256();
fp_oid = sha256r.to_str();
}
case pg_pool_t::TYPE_FINGERPRINT_SHA512:
{
sha512_digest_t sha512r = chunk_data.sha512();
fp_oid = sha512r.to_str();
}
default:
assert(0 == "unrecognized fingerprint type");
break;
}
if (fp_oid != tgt_soid.oid) {
// decrement old chunk's reference count
ObjectOperation dec_op;
cls_chunk_refcount_put_op put_call;
::encode(put_call, in);
dec_op.call("refcount", "chunk_put", in);
// we don't care dec_op's completion. scrub for dedup will fix this.
tid = osd->objecter->mutate(
tgt_soid.oid, oloc, dec_op, snapc,
ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime),
flags, NULL);
in.clear();
}
tgt_soid.oid = fp_oid;
iter->second.oid = tgt_soid;
// add data op
ceph_osd_op osd_op;
osd_op.extent.offset = 0;
osd_op.extent.length = chunk_data.length();
encode(osd_op, in);
encode(soid, in);
in.append(chunk_data);
obj_op.call("cas", "cas_write_or_get", in);
} else {
obj_op.add_data(CEPH_OSD_OP_WRITE, tgt_offset, tgt_length, chunk_data);
}

View File

@ -1455,12 +1455,18 @@ public:
typedef enum {
TYPE_FINGERPRINT_NONE = 0,
TYPE_FINGERPRINT_SHA1 = 1,
TYPE_FINGERPRINT_SHA256 = 2,
TYPE_FINGERPRINT_SHA512 = 3,
} fingerprint_t;
static fingerprint_t get_fingerprint_from_str(const std::string& s) {
if (s == "none")
return TYPE_FINGERPRINT_NONE;
if (s == "sha1")
return TYPE_FINGERPRINT_SHA1;
if (s == "sha256")
return TYPE_FINGERPRINT_SHA256;
if (s == "sha512")
return TYPE_FINGERPRINT_SHA512;
return (fingerprint_t)-1;
}
const fingerprint_t get_fingerprint_type() const {
@ -1479,6 +1485,8 @@ public:
switch (m) {
case TYPE_FINGERPRINT_NONE: return "none";
case TYPE_FINGERPRINT_SHA1: return "sha1";
case TYPE_FINGERPRINT_SHA256: return "sha256";
case TYPE_FINGERPRINT_SHA512: return "sha512";
default: return "unknown";
}
}

View File

@ -59,7 +59,7 @@ void usage()
cout << " --object <object_name> " << std::endl;
cout << " --chunk-size <size> chunk-size (byte) " << std::endl;
cout << " --chunk-algorithm <fixed|rabin> " << std::endl;
cout << " --fingerprint-algorithm <sha1> " << std::endl;
cout << " --fingerprint-algorithm <sha1|sha256|sha512> " << std::endl;
cout << " --chunk-pool <pool name> " << std::endl;
cout << " --max-thread <threads> " << std::endl;
cout << " --report-perioid <seconds> " << std::endl;
@ -371,6 +371,12 @@ void EstimateDedupRatio::add_chunk_fp_to_stat(bufferlist &chunk)
if (fp_algo == "sha1") {
sha1_digest_t sha1_val = chunk.sha1();
fp = sha1_val.to_str();
} else if (fp_algo == "sha256") {
sha256_digest_t sha256_val = chunk.sha256();
fp = sha256_val.to_str();
} else if (fp_algo == "sha512") {
sha512_digest_t sha512_val = chunk.sha512();
fp = sha512_val.to_str();
} else if (chunk_algo == "rabin") {
uint64_t hash = rabin.gen_rabin_hash(chunk.c_str(), 0, chunk.length());
fp = to_string(hash);
@ -569,7 +575,8 @@ int estimate_dedup_ratio(const std::map < std::string, std::string > &opts,
i = opts.find("fingerprint-algorithm");
if (i != opts.end()) {
fp_algo = i->second.c_str();
if (fp_algo != "sha1" && fp_algo != "rabin") {
if (fp_algo != "sha1" && fp_algo != "rabin"
&& fp_algo != "sha256" && fp_algo != "sha512") {
usage_exit();
}
} else {