From 3e066d9d32abdcaf500c1fa90ef64735e3390f95 Mon Sep 17 00:00:00 2001 From: myoungwon oh Date: Thu, 22 Jun 2023 04:42:55 +0000 Subject: [PATCH 1/5] tools/ceph_dedup_tool: limit memory used for fingerprint database Adds an FpMap capable of discarding any entries which have not met the dedup threshold. Signed-off-by: Myoungwon Oh Signed-off-by: Samuel Just --- src/tools/ceph_dedup_tool.cc | 182 ++++++++++++++++++++++++++++++----- 1 file changed, 160 insertions(+), 22 deletions(-) diff --git a/src/tools/ceph_dedup_tool.cc b/src/tools/ceph_dedup_tool.cc index b57c74cf45d..0cb90699925 100644 --- a/src/tools/ceph_dedup_tool.cc +++ b/src/tools/ceph_dedup_tool.cc @@ -185,6 +185,7 @@ po::options_description make_usage() { ("daemon", ": execute sample dedup in daemon mode") ("loop", ": execute sample dedup in a loop until terminated. Sleeps 'wakeup-period' seconds between iterations") ("wakeup-period", po::value(), ": set the wakeup period of crawler thread (sec)") + ("fpstore-threshold", po::value()->default_value(100_M), ": set max size of in-memory fingerprint store (bytes)") ; desc.add(op_desc); return desc; @@ -566,10 +567,135 @@ public: bufferlist data; }; + using dup_count_t = ssize_t; + + template + class FpMap { + using map_t = std::unordered_map; + public: + /// Represents a nullable reference into logical container + class entry_t { + /// Entry may be into one of two maps or NONE, indicates which + enum entry_into_t { + UNDER, OVER, NONE + } entry_into = NONE; + + /// Valid iterator into map for UNDER|OVER, default for NONE + map_t::iterator iter; + + entry_t(entry_into_t entry_into, map_t::iterator iter) : + entry_into(entry_into), iter(iter) { + ceph_assert(entry_into != NONE); + } + + public: + entry_t() = default; + + auto &operator*() { + ceph_assert(entry_into != NONE); + return *iter; + } + auto operator->() { + ceph_assert(entry_into != NONE); + return iter.operator->(); + } + bool is_valid() const { + return entry_into != NONE; + } + bool is_above_threshold() const { + return entry_into == entry_t::OVER; + } + friend class FpMap; + }; + + /// inserts str, count into container, must not already be present + entry_t insert(const K &str, V count) { + std::pair r; + typename entry_t::entry_into_t s; + if (count < dedup_threshold) { + r = under_threshold_fp_map.insert({str, count}); + s = entry_t::UNDER; + } else { + r = over_threshold_fp_map.insert({str, count}); + s = entry_t::OVER; + } + ceph_assert(r.second); + return entry_t{s, r.first}; + } + + /// increments refcount for entry, promotes as necessary, entry must be valid + entry_t increment_reference(entry_t entry) { + ceph_assert(entry.is_valid()); + entry.iter->second++; + if (entry.entry_into == entry_t::OVER || + entry.iter->second < dedup_threshold) { + return entry; + } else { + auto [over_iter, inserted] = over_threshold_fp_map.insert( + *entry); + ceph_assert(inserted); + under_threshold_fp_map.erase(entry.iter); + return entry_t{entry_t::OVER, over_iter}; + } + } + + /// returns entry for fp, return will be !is_valid() if not present + auto find(const K &fp) { + if (auto iter = under_threshold_fp_map.find(fp); + iter != under_threshold_fp_map.end()) { + return entry_t{entry_t::UNDER, iter}; + } else if (auto iter = over_threshold_fp_map.find(fp); + iter != over_threshold_fp_map.end()) { + return entry_t{entry_t::OVER, iter}; + } else { + return entry_t{}; + } + } + + /// true if container contains fp + bool contains(const K &fp) { + return find(fp).is_valid(); + } + + /// returns number of items + size_t get_num_items() const { + return under_threshold_fp_map.size() + over_threshold_fp_map.size(); + } + + /// returns estimate of total in-memory size (bytes) + size_t estimate_total_size() const { + size_t total = 0; + if (!under_threshold_fp_map.empty()) { + total += under_threshold_fp_map.size() * + (under_threshold_fp_map.begin()->first.size() + sizeof(V)); + } + if (!over_threshold_fp_map.empty()) { + total += over_threshold_fp_map.size() * + (over_threshold_fp_map.begin()->first.size() + sizeof(V)); + } + return total; + } + + /// true if empty + bool empty() const { + return under_threshold_fp_map.empty() && over_threshold_fp_map.empty(); + } + + /// instructs container to drop entries with refcounts below threshold + void drop_entries_below_threshold() { + under_threshold_fp_map.clear(); + } + + FpMap(ssize_t dedup_threshold) : dedup_threshold(dedup_threshold) {} + FpMap() = delete; + private: + map_t under_threshold_fp_map; + map_t over_threshold_fp_map; + const ssize_t dedup_threshold; + }; + class FpStore { public: - using dup_count_t = ssize_t; - void maybe_print_status() { utime_t now = ceph_clock_now(); if (next_report != utime_t() && now > next_report) { @@ -581,41 +707,50 @@ public: } } - bool find(string& fp) { + bool contains(string& fp) { std::shared_lock lock(fingerprint_lock); - auto found_item = fp_map.find(fp); - return found_item != fp_map.end(); + return fp_map.contains(fp); } // return true if the chunk is duplicate bool add(chunk_t& chunk) { std::unique_lock lock(fingerprint_lock); - auto found_iter = fp_map.find(chunk.fingerprint); - ssize_t cur_reference = 1; + auto entry = fp_map.find(chunk.fingerprint); total_bytes += chunk.size; - maybe_print_status(); - if (found_iter == fp_map.end()) { - fp_map.insert({chunk.fingerprint, 1}); + if (!entry.is_valid()) { + if (is_fpmap_full()) { + fp_map.drop_entries_below_threshold(); + if (is_fpmap_full()) { + return false; + } + } + entry = fp_map.insert(chunk.fingerprint, 1); } else { - cur_reference = ++found_iter->second; + entry = fp_map.increment_reference(entry); } - return cur_reference >= dedup_threshold && dedup_threshold != -1; + return entry.is_above_threshold(); } - FpStore(size_t chunk_threshold, uint32_t report_period) : - dedup_threshold(chunk_threshold), report_period(report_period) { - next_report = start; - next_report += report_period; + bool is_fpmap_full() const { + return fp_map.estimate_total_size() >= memory_threshold; } + FpStore(size_t chunk_threshold, + uint32_t report_period, + ssize_t memory_threshold) : + report_period(report_period), + memory_threshold(memory_threshold), + fp_map(chunk_threshold) { } + FpStore() = delete; + private: - ssize_t dedup_threshold = -1; - std::unordered_map fp_map; std::shared_mutex fingerprint_lock; const utime_t start = ceph_clock_now(); utime_t next_report; const uint32_t report_period; size_t total_bytes = 0; + const uint64_t memory_threshold; + FpMap fp_map; }; struct SampleDedupGlobal { @@ -624,8 +759,9 @@ public: SampleDedupGlobal( int chunk_threshold, int sampling_ratio, - uint32_t report_period) : - fp_store(chunk_threshold, report_period), + uint32_t report_period, + unsigned fpstore_threshold) : + fp_store(chunk_threshold, report_period, fpstore_threshold), sampling_ratio(static_cast(sampling_ratio) / 100) { } }; @@ -836,7 +972,7 @@ void SampleDedupWorkerThread::try_dedup_and_accumulate_result( .data = chunk_data }; - if (sample_dedup_global.fp_store.find(fingerprint)) { + if (sample_dedup_global.fp_store.contains(fingerprint)) { duplicated_size += chunk_data.length(); } if (sample_dedup_global.fp_store.add(chunk_info)) { @@ -1643,6 +1779,8 @@ int make_crawling_daemon(const po::variables_map &opts) cout << "100 second is set as wakeup period by default" << std::endl; } + const unsigned fp_threshold = opts["fpstore-threshold"].as(); + std::string fp_algo = get_opts_fp_algo(opts); list pool_names; @@ -1714,7 +1852,7 @@ int make_crawling_daemon(const po::variables_map &opts) } SampleDedupWorkerThread::SampleDedupGlobal sample_dedup_global( - chunk_dedup_threshold, sampling_ratio, report_period); + chunk_dedup_threshold, sampling_ratio, report_period, fp_threshold); std::list threads; size_t total_size = 0; From c841c761ce6d3ef86a4390b4e6367f72bb323fb2 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 6 Oct 2023 15:54:28 -0700 Subject: [PATCH 2/5] tools/ceph_dedup_tool: use size_t for fpstore-threshold At various points, this value was an int, an unsigned, an ssize_t, or a uint64_t. Change all of these to size_t. Signed-off-by: Samuel Just --- src/tools/ceph_dedup_tool.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tools/ceph_dedup_tool.cc b/src/tools/ceph_dedup_tool.cc index 0cb90699925..65c2b9ac3c6 100644 --- a/src/tools/ceph_dedup_tool.cc +++ b/src/tools/ceph_dedup_tool.cc @@ -185,7 +185,7 @@ po::options_description make_usage() { ("daemon", ": execute sample dedup in daemon mode") ("loop", ": execute sample dedup in a loop until terminated. Sleeps 'wakeup-period' seconds between iterations") ("wakeup-period", po::value(), ": set the wakeup period of crawler thread (sec)") - ("fpstore-threshold", po::value()->default_value(100_M), ": set max size of in-memory fingerprint store (bytes)") + ("fpstore-threshold", po::value()->default_value(100_M), ": set max size of in-memory fingerprint store (bytes)") ; desc.add(op_desc); return desc; @@ -737,7 +737,7 @@ public: FpStore(size_t chunk_threshold, uint32_t report_period, - ssize_t memory_threshold) : + size_t memory_threshold) : report_period(report_period), memory_threshold(memory_threshold), fp_map(chunk_threshold) { } @@ -749,7 +749,7 @@ public: utime_t next_report; const uint32_t report_period; size_t total_bytes = 0; - const uint64_t memory_threshold; + const size_t memory_threshold; FpMap fp_map; }; @@ -760,7 +760,7 @@ public: int chunk_threshold, int sampling_ratio, uint32_t report_period, - unsigned fpstore_threshold) : + size_t fpstore_threshold) : fp_store(chunk_threshold, report_period, fpstore_threshold), sampling_ratio(static_cast(sampling_ratio) / 100) { } }; @@ -1779,7 +1779,7 @@ int make_crawling_daemon(const po::variables_map &opts) cout << "100 second is set as wakeup period by default" << std::endl; } - const unsigned fp_threshold = opts["fpstore-threshold"].as(); + const size_t fp_threshold = opts["fpstore-threshold"].as(); std::string fp_algo = get_opts_fp_algo(opts); From 1e4eb389ac22201b1614437ecbcb3f70bfe4d3ef Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 6 Oct 2023 15:58:01 -0700 Subject: [PATCH 3/5] tools/ceph_dedup_tool: use size_t for dup_count_t Duplicate counts cannot be negative. Signed-off-by: Samuel Just --- src/tools/ceph_dedup_tool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/ceph_dedup_tool.cc b/src/tools/ceph_dedup_tool.cc index 65c2b9ac3c6..4808058b866 100644 --- a/src/tools/ceph_dedup_tool.cc +++ b/src/tools/ceph_dedup_tool.cc @@ -567,7 +567,7 @@ public: bufferlist data; }; - using dup_count_t = ssize_t; + using dup_count_t = size_t; template class FpMap { From 08d71e82a34e7ab4eb79b1c81b331e03d5437155 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 6 Oct 2023 15:59:44 -0700 Subject: [PATCH 4/5] tools/ceph_dedup_tool: make dedup_threshold a size_t It was variously an int, a uint32_t, or an ssize_t. Standardize it to size_t. Signed-off-by: Samuel Just --- src/tools/ceph_dedup_tool.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tools/ceph_dedup_tool.cc b/src/tools/ceph_dedup_tool.cc index 4808058b866..c67368f9318 100644 --- a/src/tools/ceph_dedup_tool.cc +++ b/src/tools/ceph_dedup_tool.cc @@ -180,7 +180,7 @@ po::options_description make_usage() { ("snap", ": deduplciate snapshotted object") ("debug", ": enable debug") ("pgid", ": set pgid") - ("chunk-dedup-threshold", po::value(), ": set the threshold for chunk dedup (number of duplication) ") + ("chunk-dedup-threshold", po::value(), ": set the threshold for chunk dedup (number of duplication) ") ("sampling-ratio", po::value(), ": set the sampling ratio (percentile)") ("daemon", ": execute sample dedup in daemon mode") ("loop", ": execute sample dedup in a loop until terminated. Sleeps 'wakeup-period' seconds between iterations") @@ -686,12 +686,12 @@ public: under_threshold_fp_map.clear(); } - FpMap(ssize_t dedup_threshold) : dedup_threshold(dedup_threshold) {} + FpMap(size_t dedup_threshold) : dedup_threshold(dedup_threshold) {} FpMap() = delete; private: map_t under_threshold_fp_map; map_t over_threshold_fp_map; - const ssize_t dedup_threshold; + const size_t dedup_threshold; }; class FpStore { @@ -757,7 +757,7 @@ public: FpStore fp_store; const double sampling_ratio = -1; SampleDedupGlobal( - int chunk_threshold, + size_t chunk_threshold, int sampling_ratio, uint32_t report_period, size_t fpstore_threshold) : @@ -1756,7 +1756,7 @@ int make_crawling_daemon(const po::variables_map &opts) uint32_t chunk_dedup_threshold = -1; if (opts.count("chunk-dedup-threshold")) { - chunk_dedup_threshold = opts["chunk-dedup-threshold"].as(); + chunk_dedup_threshold = opts["chunk-dedup-threshold"].as(); } std::string chunk_algo = get_opts_chunk_algo(opts); From c4275c9842cbd5f80b87b84a34bcb56d480129fc Mon Sep 17 00:00:00 2001 From: myoungwon oh Date: Fri, 3 Nov 2023 03:18:32 +0000 Subject: [PATCH 5/5] tools/ceph_dedup_tool: add a test to check the memory limit works correctly Signed-off-by: Myoungwon Oh --- qa/workunits/rados/test_dedup_tool.sh | 71 +++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/qa/workunits/rados/test_dedup_tool.sh b/qa/workunits/rados/test_dedup_tool.sh index 352e9ef09c1..381094c57c2 100755 --- a/qa/workunits/rados/test_dedup_tool.sh +++ b/qa/workunits/rados/test_dedup_tool.sh @@ -515,6 +515,76 @@ function test_sample_dedup_snap() $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it } +function test_dedup_memory_limit() +{ + CHUNK_POOL=dedup_chunk_pool + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + + sleep 2 + + run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8 + run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8 + + # 6 dedupable objects + CONTENT_1="There hiHI" + echo $CONTENT_1 > foo + for num in `seq 1 6` + do + $RADOS_TOOL -p $POOL put foo_$num ./foo + done + + # 3 Unique objects + for num in `seq 7 9` + do + CONTENT_="There hiHI"$num + echo $CONTENT_ > foo + $RADOS_TOOL -p $POOL put foo_$num ./foo + done + + # 6 dedupable objects + CONTENT_2="There hiHIhi" + echo $CONTENT_2 > foo + for num in `seq 10 15` + do + $RADOS_TOOL -p $POOL put foo_$num ./foo + done + + #Since the memory limit is 100 bytes, adding 3 unique objects causes a memory drop, leaving + #the chunk of the 6 dupable objects. If we then add 6 dedupable objects to the pool, + #the crawler should find dedupable chunks because it free memory space through the memory drop before. + # 1 entry == 46 bytes + + sleep 2 + + # Execute dedup crawler + RESULT=$($DEDUP_TOOL --pool $POOL --chunk-pool $CHUNK_POOL --op sample-dedup --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --chunk-dedup-threshold 2 --sampling-ratio 100 --fpstore-threshold 100) + + CHUNK_OID_1=$(echo $CONTENT_1 | sha1sum | awk '{print $1}') + CHUNK_OID_2=$(echo $CONTENT_2 | sha1sum | awk '{print $1}') + + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_1 | grep foo) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "There is no expected chunk object" + fi + + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_2 | grep foo) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "There is no expected chunk object" + fi + + rm -rf ./foo + for num in `seq 1 15` + do + $RADOS_TOOL -p $POOL rm foo_$num + done + + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it +} test_dedup_ratio_fixed test_dedup_chunk_scrub @@ -522,6 +592,7 @@ test_dedup_chunk_repair test_dedup_object test_sample_dedup test_sample_dedup_snap +test_dedup_memory_limit $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it