mirror of
https://github.com/ceph/ceph
synced 2025-01-20 01:51:34 +00:00
common,osd: add hash algorithms for dedup (sha256, sha512)
Signed-off-by: Myoungwon Oh <omwmw@sk.com>
This commit is contained in:
parent
f1ea02970b
commit
f745a7d35f
@ -2043,6 +2043,8 @@ void buffer::list::invalidate_crc()
|
||||
|
||||
#include "common/ceph_crypto.h"
|
||||
using ceph::crypto::SHA1;
|
||||
using ceph::crypto::SHA256;
|
||||
using ceph::crypto::SHA512;
|
||||
|
||||
sha1_digest_t buffer::list::sha1()
|
||||
{
|
||||
@ -2055,6 +2057,28 @@ sha1_digest_t buffer::list::sha1()
|
||||
return sha1_digest_t(fingerprint);
|
||||
}
|
||||
|
||||
sha256_digest_t buffer::list::sha256()
|
||||
{
|
||||
unsigned char fingerprint[CEPH_CRYPTO_SHA256_DIGESTSIZE];
|
||||
SHA256 sha256_gen;
|
||||
for (auto& p : _buffers) {
|
||||
sha256_gen.Update((const unsigned char *)p.c_str(), p.length());
|
||||
}
|
||||
sha256_gen.Final(fingerprint);
|
||||
return sha256_digest_t(fingerprint);
|
||||
}
|
||||
|
||||
sha512_digest_t buffer::list::sha512()
|
||||
{
|
||||
unsigned char fingerprint[CEPH_CRYPTO_SHA512_DIGESTSIZE];
|
||||
SHA512 sha512_gen;
|
||||
for (auto& p : _buffers) {
|
||||
sha512_gen.Update((const unsigned char *)p.c_str(), p.length());
|
||||
}
|
||||
sha512_gen.Final(fingerprint);
|
||||
return sha512_digest_t(fingerprint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Binary write all contents to a C++ stream
|
||||
*/
|
||||
|
@ -10,6 +10,7 @@
|
||||
#define CEPH_CRYPTO_SHA1_DIGESTSIZE 20
|
||||
#define CEPH_CRYPTO_HMACSHA256_DIGESTSIZE 32
|
||||
#define CEPH_CRYPTO_SHA256_DIGESTSIZE 32
|
||||
#define CEPH_CRYPTO_SHA512_DIGESTSIZE 64
|
||||
|
||||
#ifdef USE_NSS
|
||||
// you *must* use CRYPTO_CXXFLAGS in CMakeLists.txt for including this include
|
||||
@ -33,6 +34,7 @@ extern "C" {
|
||||
const EVP_MD *EVP_md5(void);
|
||||
const EVP_MD *EVP_sha1(void);
|
||||
const EVP_MD *EVP_sha256(void);
|
||||
const EVP_MD *EVP_sha512(void);
|
||||
}
|
||||
#endif /*USE_OPENSSL*/
|
||||
|
||||
@ -115,6 +117,11 @@ namespace ceph {
|
||||
public:
|
||||
SHA256 () : NSSDigest(SEC_OID_SHA256, CEPH_CRYPTO_SHA256_DIGESTSIZE) { }
|
||||
};
|
||||
|
||||
class SHA512 : public NSSDigest {
|
||||
public:
|
||||
SHA512 () : NSSDigest(SEC_OID_SHA512, CEPH_CRYPTO_SHA512_DIGESTSIZE) { }
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -150,6 +157,11 @@ namespace ceph {
|
||||
public:
|
||||
SHA256 () : OpenSSLDigest(EVP_sha256()) { }
|
||||
};
|
||||
|
||||
class SHA512 : public OpenSSLDigest {
|
||||
public:
|
||||
SHA512 () : OpenSSLDigest(EVP_sha512()) { }
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -337,6 +349,7 @@ namespace ceph {
|
||||
using ceph::crypto::ssl::SHA256;
|
||||
using ceph::crypto::ssl::MD5;
|
||||
using ceph::crypto::ssl::SHA1;
|
||||
using ceph::crypto::ssl::SHA512;
|
||||
|
||||
using ceph::crypto::ssl::HMACSHA256;
|
||||
using ceph::crypto::ssl::HMACSHA1;
|
||||
@ -348,6 +361,7 @@ namespace ceph {
|
||||
using ceph::crypto::nss::SHA256;
|
||||
using ceph::crypto::nss::MD5;
|
||||
using ceph::crypto::nss::SHA1;
|
||||
using ceph::crypto::nss::SHA512;
|
||||
|
||||
using ceph::crypto::nss::HMACSHA256;
|
||||
using ceph::crypto::nss::HMACSHA1;
|
||||
|
@ -76,6 +76,8 @@ class deleter;
|
||||
template<uint8_t S>
|
||||
struct sha_digest_t;
|
||||
using sha1_digest_t = sha_digest_t<20>;
|
||||
using sha256_digest_t = sha_digest_t<32>;
|
||||
using sha512_digest_t = sha_digest_t<64>;
|
||||
|
||||
template<typename T> class DencDumper;
|
||||
|
||||
@ -1233,6 +1235,8 @@ inline namespace v14_2_0 {
|
||||
uint32_t crc32c(uint32_t crc) const;
|
||||
void invalidate_crc();
|
||||
sha1_digest_t sha1();
|
||||
sha256_digest_t sha256();
|
||||
sha512_digest_t sha512();
|
||||
|
||||
// These functions return a bufferlist with a pointer to a single
|
||||
// static buffer. They /must/ not outlive the memory they
|
||||
|
@ -623,5 +623,8 @@ WRITE_CLASS_ENCODER(sha1_digest_t)
|
||||
using sha256_digest_t = sha_digest_t<32>;
|
||||
WRITE_CLASS_ENCODER(sha256_digest_t)
|
||||
|
||||
using md5_digest_t = sha_digest_t<16>;
|
||||
WRITE_CLASS_ENCODER(md5_digest_t)
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -2472,41 +2472,52 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush
|
||||
pg_pool_t::fingerprint_t fp_algo_t = pool.info.get_fingerprint_type();
|
||||
if (iter->second.has_reference() &&
|
||||
fp_algo_t != pg_pool_t::TYPE_FINGERPRINT_NONE) {
|
||||
object_t fp_oid;
|
||||
bufferlist in;
|
||||
switch (fp_algo_t) {
|
||||
case pg_pool_t::TYPE_FINGERPRINT_SHA1:
|
||||
{
|
||||
sha1_digest_t sha1r = chunk_data.sha1();
|
||||
object_t fp_oid = sha1r.to_str();
|
||||
bufferlist in;
|
||||
if (fp_oid != tgt_soid.oid) {
|
||||
// decrement old chunk's reference count
|
||||
ObjectOperation dec_op;
|
||||
cls_chunk_refcount_put_op put_call;
|
||||
::encode(put_call, in);
|
||||
dec_op.call("refcount", "chunk_put", in);
|
||||
// we don't care dec_op's completion. scrub for dedup will fix this.
|
||||
tid = osd->objecter->mutate(
|
||||
tgt_soid.oid, oloc, dec_op, snapc,
|
||||
ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime),
|
||||
flags, NULL);
|
||||
in.clear();
|
||||
}
|
||||
tgt_soid.oid = fp_oid;
|
||||
iter->second.oid = tgt_soid;
|
||||
// add data op
|
||||
ceph_osd_op osd_op;
|
||||
osd_op.extent.offset = 0;
|
||||
osd_op.extent.length = chunk_data.length();
|
||||
encode(osd_op, in);
|
||||
encode(soid, in);
|
||||
in.append(chunk_data);
|
||||
obj_op.call("cas", "cas_write_or_get", in);
|
||||
fp_oid = sha1r.to_str();
|
||||
break;
|
||||
}
|
||||
case pg_pool_t::TYPE_FINGERPRINT_SHA256:
|
||||
{
|
||||
sha256_digest_t sha256r = chunk_data.sha256();
|
||||
fp_oid = sha256r.to_str();
|
||||
}
|
||||
case pg_pool_t::TYPE_FINGERPRINT_SHA512:
|
||||
{
|
||||
sha512_digest_t sha512r = chunk_data.sha512();
|
||||
fp_oid = sha512r.to_str();
|
||||
}
|
||||
default:
|
||||
assert(0 == "unrecognized fingerprint type");
|
||||
break;
|
||||
}
|
||||
if (fp_oid != tgt_soid.oid) {
|
||||
// decrement old chunk's reference count
|
||||
ObjectOperation dec_op;
|
||||
cls_chunk_refcount_put_op put_call;
|
||||
::encode(put_call, in);
|
||||
dec_op.call("refcount", "chunk_put", in);
|
||||
// we don't care dec_op's completion. scrub for dedup will fix this.
|
||||
tid = osd->objecter->mutate(
|
||||
tgt_soid.oid, oloc, dec_op, snapc,
|
||||
ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime),
|
||||
flags, NULL);
|
||||
in.clear();
|
||||
}
|
||||
tgt_soid.oid = fp_oid;
|
||||
iter->second.oid = tgt_soid;
|
||||
// add data op
|
||||
ceph_osd_op osd_op;
|
||||
osd_op.extent.offset = 0;
|
||||
osd_op.extent.length = chunk_data.length();
|
||||
encode(osd_op, in);
|
||||
encode(soid, in);
|
||||
in.append(chunk_data);
|
||||
obj_op.call("cas", "cas_write_or_get", in);
|
||||
} else {
|
||||
obj_op.add_data(CEPH_OSD_OP_WRITE, tgt_offset, tgt_length, chunk_data);
|
||||
}
|
||||
|
@ -1455,12 +1455,18 @@ public:
|
||||
typedef enum {
|
||||
TYPE_FINGERPRINT_NONE = 0,
|
||||
TYPE_FINGERPRINT_SHA1 = 1,
|
||||
TYPE_FINGERPRINT_SHA256 = 2,
|
||||
TYPE_FINGERPRINT_SHA512 = 3,
|
||||
} fingerprint_t;
|
||||
static fingerprint_t get_fingerprint_from_str(const std::string& s) {
|
||||
if (s == "none")
|
||||
return TYPE_FINGERPRINT_NONE;
|
||||
if (s == "sha1")
|
||||
return TYPE_FINGERPRINT_SHA1;
|
||||
if (s == "sha256")
|
||||
return TYPE_FINGERPRINT_SHA256;
|
||||
if (s == "sha512")
|
||||
return TYPE_FINGERPRINT_SHA512;
|
||||
return (fingerprint_t)-1;
|
||||
}
|
||||
const fingerprint_t get_fingerprint_type() const {
|
||||
@ -1479,6 +1485,8 @@ public:
|
||||
switch (m) {
|
||||
case TYPE_FINGERPRINT_NONE: return "none";
|
||||
case TYPE_FINGERPRINT_SHA1: return "sha1";
|
||||
case TYPE_FINGERPRINT_SHA256: return "sha256";
|
||||
case TYPE_FINGERPRINT_SHA512: return "sha512";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ void usage()
|
||||
cout << " --object <object_name> " << std::endl;
|
||||
cout << " --chunk-size <size> chunk-size (byte) " << std::endl;
|
||||
cout << " --chunk-algorithm <fixed|rabin> " << std::endl;
|
||||
cout << " --fingerprint-algorithm <sha1> " << std::endl;
|
||||
cout << " --fingerprint-algorithm <sha1|sha256|sha512> " << std::endl;
|
||||
cout << " --chunk-pool <pool name> " << std::endl;
|
||||
cout << " --max-thread <threads> " << std::endl;
|
||||
cout << " --report-perioid <seconds> " << std::endl;
|
||||
@ -371,6 +371,12 @@ void EstimateDedupRatio::add_chunk_fp_to_stat(bufferlist &chunk)
|
||||
if (fp_algo == "sha1") {
|
||||
sha1_digest_t sha1_val = chunk.sha1();
|
||||
fp = sha1_val.to_str();
|
||||
} else if (fp_algo == "sha256") {
|
||||
sha256_digest_t sha256_val = chunk.sha256();
|
||||
fp = sha256_val.to_str();
|
||||
} else if (fp_algo == "sha512") {
|
||||
sha512_digest_t sha512_val = chunk.sha512();
|
||||
fp = sha512_val.to_str();
|
||||
} else if (chunk_algo == "rabin") {
|
||||
uint64_t hash = rabin.gen_rabin_hash(chunk.c_str(), 0, chunk.length());
|
||||
fp = to_string(hash);
|
||||
@ -569,7 +575,8 @@ int estimate_dedup_ratio(const std::map < std::string, std::string > &opts,
|
||||
i = opts.find("fingerprint-algorithm");
|
||||
if (i != opts.end()) {
|
||||
fp_algo = i->second.c_str();
|
||||
if (fp_algo != "sha1" && fp_algo != "rabin") {
|
||||
if (fp_algo != "sha1" && fp_algo != "rabin"
|
||||
&& fp_algo != "sha256" && fp_algo != "sha512") {
|
||||
usage_exit();
|
||||
}
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user