mirror of
https://github.com/ceph/ceph
synced 2025-01-02 17:12:31 +00:00
common: introduce BoundedKeyCounter and unit test
Signed-off-by: Casey Bodley <cbodley@redhat.com>
This commit is contained in:
parent
4309adb36b
commit
e9a5ec9f64
187
src/common/bounded_key_counter.h
Normal file
187
src/common/bounded_key_counter.h
Normal file
@ -0,0 +1,187 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* Copyright (C) 2017 Red Hat, Inc
|
||||
*
|
||||
* Author: Casey Bodley <cbodley@redhat.com>
|
||||
*
|
||||
* This is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software
|
||||
* Foundation. See file COPYING.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef BOUNDED_KEY_COUNTER_H
|
||||
#define BOUNDED_KEY_COUNTER_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "include/assert.h"
|
||||
|
||||
/**
|
||||
* BoundedKeyCounter
|
||||
*
|
||||
* A data structure that counts the number of times a given key is inserted,
|
||||
* and can return the keys with the highest counters. The number of unique keys
|
||||
* is bounded by the given constructor argument, meaning that new keys will be
|
||||
* rejected if they would exceed this bound.
|
||||
*
|
||||
* It is optimized for use where insertion is frequent, but sorted listings are
|
||||
* both infrequent and tend to request a small subset of the available keys.
|
||||
*/
|
||||
template <typename Key, typename Count>
|
||||
class BoundedKeyCounter {
|
||||
/// map type to associate keys with their counter values
|
||||
using map_type = std::map<Key, Count>;
|
||||
using value_type = typename map_type::value_type;
|
||||
|
||||
/// view type used for sorting key-value pairs by their counter value
|
||||
using view_type = std::vector<const value_type*>;
|
||||
|
||||
/// maximum number of counters to store at once
|
||||
const size_t bound;
|
||||
|
||||
/// map of counters, with a maximum size given by 'bound'
|
||||
map_type counters;
|
||||
|
||||
/// storage for sorted key-value pairs
|
||||
view_type sorted;
|
||||
|
||||
/// remembers how much of the range is actually sorted
|
||||
typename view_type::iterator sorted_position;
|
||||
|
||||
/// invalidate view of sorted entries
|
||||
void invalidate_sorted()
|
||||
{
|
||||
sorted_position = sorted.begin();
|
||||
sorted.clear();
|
||||
}
|
||||
|
||||
/// value_type comparison function for sorting in descending order
|
||||
static bool value_greater(const value_type *lhs, const value_type *rhs)
|
||||
{
|
||||
return lhs->second > rhs->second;
|
||||
}
|
||||
|
||||
/// map iterator that adapts value_type to value_type*
|
||||
struct const_pointer_iterator : public map_type::const_iterator {
|
||||
const_pointer_iterator(typename map_type::const_iterator i)
|
||||
: map_type::const_iterator(i) {}
|
||||
const value_type* operator*() const {
|
||||
return &map_type::const_iterator::operator*();
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
/// return the number of sorted entries. marked protected for unit testing
|
||||
size_t get_num_sorted() const
|
||||
{
|
||||
using const_iterator = typename view_type::const_iterator;
|
||||
return std::distance<const_iterator>(sorted.begin(), sorted_position);
|
||||
}
|
||||
|
||||
public:
|
||||
BoundedKeyCounter(size_t bound)
|
||||
: bound(bound)
|
||||
{
|
||||
sorted.reserve(bound);
|
||||
sorted_position = sorted.begin();
|
||||
}
|
||||
|
||||
/// return the number of keys stored
|
||||
size_t size() const noexcept { return counters.size(); }
|
||||
|
||||
/// return the maximum number of keys
|
||||
size_t capacity() const noexcept { return bound; }
|
||||
|
||||
/// increment a counter for the given key and return its value. if the key was
|
||||
/// not present, insert it. if the map is full, return 0
|
||||
Count insert(const Key& key, Count n = 1)
|
||||
{
|
||||
typename map_type::iterator i;
|
||||
|
||||
if (counters.size() < bound) {
|
||||
// insert new entries at count=0
|
||||
bool inserted;
|
||||
std::tie(i, inserted) = counters.emplace(key, 0);
|
||||
if (inserted) {
|
||||
sorted.push_back(&*i);
|
||||
}
|
||||
} else {
|
||||
// when full, refuse to insert new entries
|
||||
i = counters.find(key);
|
||||
if (i == counters.end()) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
i->second += n; // add to the counter
|
||||
|
||||
// update sorted position if necessary. use a binary search for the last
|
||||
// element in the sorted range that's greater than this counter
|
||||
sorted_position = std::lower_bound(sorted.begin(), sorted_position,
|
||||
&*i, &value_greater);
|
||||
|
||||
return i->second;
|
||||
}
|
||||
|
||||
/// remove the given key from the map of counters
|
||||
void erase(const Key& key)
|
||||
{
|
||||
auto i = counters.find(key);
|
||||
if (i == counters.end()) {
|
||||
return;
|
||||
}
|
||||
// removing the sorted entry would require linear search; invalidate instead
|
||||
invalidate_sorted();
|
||||
|
||||
counters.erase(i);
|
||||
}
|
||||
|
||||
/// query the highest N key-value pairs sorted by counter value, passing each
|
||||
/// in order to the given callback with arguments (Key, Count)
|
||||
template <typename Callback>
|
||||
void get_highest(size_t count, Callback&& cb)
|
||||
{
|
||||
if (sorted.empty()) {
|
||||
// initialize the vector with pointers to all key-value pairs
|
||||
sorted.assign(const_pointer_iterator{counters.cbegin()},
|
||||
const_pointer_iterator{counters.cend()});
|
||||
// entire range is unsorted
|
||||
assert(sorted_position == sorted.begin());
|
||||
}
|
||||
|
||||
const size_t sorted_count = get_num_sorted();
|
||||
if (sorted_count < count) {
|
||||
// move sorted_position to cover the requested number of entries
|
||||
sorted_position = sorted.begin() + std::min(count, sorted.size());
|
||||
|
||||
// sort all entries in descending order up to the given position
|
||||
std::partial_sort(sorted.begin(), sorted_position, sorted.end(),
|
||||
&value_greater);
|
||||
}
|
||||
|
||||
// return the requested range via callback
|
||||
for (const auto& pair : sorted) {
|
||||
if (count-- == 0) {
|
||||
return;
|
||||
}
|
||||
cb(pair->first, pair->second);
|
||||
}
|
||||
}
|
||||
|
||||
/// remove all keys and counters and invalidate the sorted range
|
||||
void clear()
|
||||
{
|
||||
invalidate_sorted();
|
||||
counters.clear();
|
||||
}
|
||||
};
|
||||
|
||||
#endif // BOUNDED_KEY_COUNTER_H
|
@ -271,3 +271,9 @@ add_ceph_unittest(unittest_backport14)
|
||||
add_executable(unittest_convenience test_convenience.cc)
|
||||
target_link_libraries(unittest_convenience ceph-common)
|
||||
add_ceph_unittest(unittest_convenience)
|
||||
|
||||
add_executable(unittest_bounded_key_counter
|
||||
test_bounded_key_counter.cc
|
||||
$<TARGET_OBJECTS:unit-main>)
|
||||
target_link_libraries(unittest_bounded_key_counter global)
|
||||
add_ceph_unittest(unittest_bounded_key_counter)
|
||||
|
200
src/test/common/test_bounded_key_counter.cc
Normal file
200
src/test/common/test_bounded_key_counter.cc
Normal file
@ -0,0 +1,200 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* Copyright (C) 2015 Red Hat
|
||||
*
|
||||
* This is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software
|
||||
* Foundation. See file COPYING.
|
||||
*
|
||||
*/
|
||||
#include "common/bounded_key_counter.h"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace {
|
||||
|
||||
// call get_highest() and return the number of callbacks
|
||||
template <typename Key, typename Count>
|
||||
size_t count_highest(BoundedKeyCounter<Key, Count>& counter, size_t count)
|
||||
{
|
||||
size_t callbacks = 0;
|
||||
counter.get_highest(count, [&callbacks] (const Key& key, Count count) {
|
||||
++callbacks;
|
||||
});
|
||||
return callbacks;
|
||||
}
|
||||
|
||||
// call get_highest() and return the key/value pairs as a vector
|
||||
template <typename Key, typename Count,
|
||||
typename Vector = std::vector<std::pair<Key, Count>>>
|
||||
Vector get_highest(BoundedKeyCounter<Key, Count>& counter, size_t count)
|
||||
{
|
||||
Vector results;
|
||||
counter.get_highest(count, [&results] (const Key& key, Count count) {
|
||||
results.emplace_back(key, count);
|
||||
});
|
||||
return results;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(BoundedKeyCounter, Insert)
|
||||
{
|
||||
BoundedKeyCounter<int, int> counter(2);
|
||||
EXPECT_EQ(1, counter.insert(0)); // insert new key
|
||||
EXPECT_EQ(2, counter.insert(0)); // increment counter
|
||||
EXPECT_EQ(7, counter.insert(0, 5)); // add 5 to counter
|
||||
EXPECT_EQ(1, counter.insert(1)); // insert new key
|
||||
EXPECT_EQ(0, counter.insert(2)); // reject new key
|
||||
}
|
||||
|
||||
TEST(BoundedKeyCounter, Erase)
|
||||
{
|
||||
BoundedKeyCounter<int, int> counter(10);
|
||||
|
||||
counter.erase(0); // ok to erase nonexistent key
|
||||
EXPECT_EQ(1, counter.insert(1, 1));
|
||||
EXPECT_EQ(2, counter.insert(2, 2));
|
||||
EXPECT_EQ(3, counter.insert(3, 3));
|
||||
counter.erase(2);
|
||||
counter.erase(1);
|
||||
counter.erase(3);
|
||||
counter.erase(3);
|
||||
EXPECT_EQ(0u, count_highest(counter, 10));
|
||||
}
|
||||
|
||||
TEST(BoundedKeyCounter, Size)
|
||||
{
|
||||
BoundedKeyCounter<int, int> counter(4);
|
||||
EXPECT_EQ(0u, counter.size());
|
||||
EXPECT_EQ(1, counter.insert(1, 1));
|
||||
EXPECT_EQ(1u, counter.size());
|
||||
EXPECT_EQ(2, counter.insert(2, 2));
|
||||
EXPECT_EQ(2u, counter.size());
|
||||
EXPECT_EQ(3, counter.insert(3, 3));
|
||||
EXPECT_EQ(3u, counter.size());
|
||||
EXPECT_EQ(4, counter.insert(4, 4));
|
||||
EXPECT_EQ(4u, counter.size());
|
||||
EXPECT_EQ(0, counter.insert(5, 5)); // reject new key
|
||||
EXPECT_EQ(4u, counter.size()); // size unchanged
|
||||
EXPECT_EQ(5, counter.insert(4, 1)); // update existing key
|
||||
EXPECT_EQ(4u, counter.size()); // size unchanged
|
||||
counter.erase(2);
|
||||
EXPECT_EQ(3u, counter.size());
|
||||
counter.erase(2); // erase duplicate
|
||||
EXPECT_EQ(3u, counter.size()); // size unchanged
|
||||
counter.erase(4);
|
||||
EXPECT_EQ(2u, counter.size());
|
||||
counter.erase(1);
|
||||
EXPECT_EQ(1u, counter.size());
|
||||
counter.erase(3);
|
||||
EXPECT_EQ(0u, counter.size());
|
||||
EXPECT_EQ(6, counter.insert(6, 6));
|
||||
EXPECT_EQ(1u, counter.size());
|
||||
counter.clear();
|
||||
EXPECT_EQ(0u, counter.size());
|
||||
}
|
||||
|
||||
TEST(BoundedKeyCounter, GetHighest)
|
||||
{
|
||||
BoundedKeyCounter<int, int> counter(10);
|
||||
using Vector = std::vector<std::pair<int, int>>;
|
||||
|
||||
EXPECT_EQ(0u, count_highest(counter, 0)); // ok to request 0
|
||||
EXPECT_EQ(0u, count_highest(counter, 10)); // empty
|
||||
EXPECT_EQ(0u, count_highest(counter, 999)); // ok to request count >> 10
|
||||
|
||||
EXPECT_EQ(1, counter.insert(1, 1));
|
||||
EXPECT_EQ(Vector({{1,1}}), get_highest(counter, 10));
|
||||
EXPECT_EQ(2, counter.insert(2, 2));
|
||||
EXPECT_EQ(Vector({{2,2},{1,1}}), get_highest(counter, 10));
|
||||
EXPECT_EQ(3, counter.insert(3, 3));
|
||||
EXPECT_EQ(Vector({{3,3},{2,2},{1,1}}), get_highest(counter, 10));
|
||||
EXPECT_EQ(3, counter.insert(4, 3)); // insert duplicated count=3
|
||||
// still returns 4 entries (but order of {3,3} and {4,3} is unspecified)
|
||||
EXPECT_EQ(4u, count_highest(counter, 10));
|
||||
counter.erase(3);
|
||||
EXPECT_EQ(Vector({{4,3},{2,2},{1,1}}), get_highest(counter, 10));
|
||||
EXPECT_EQ(0u, count_highest(counter, 0)); // requesting 0 still returns 0
|
||||
}
|
||||
|
||||
TEST(BoundedKeyCounter, Clear)
|
||||
{
|
||||
BoundedKeyCounter<int, int> counter(2);
|
||||
EXPECT_EQ(1, counter.insert(0)); // insert new key
|
||||
EXPECT_EQ(1, counter.insert(1)); // insert new key
|
||||
EXPECT_EQ(2u, count_highest(counter, 2)); // return 2 entries
|
||||
|
||||
counter.clear();
|
||||
|
||||
EXPECT_EQ(0u, count_highest(counter, 2)); // return 0 entries
|
||||
EXPECT_EQ(1, counter.insert(1)); // insert new key
|
||||
EXPECT_EQ(1, counter.insert(2)); // insert new unique key
|
||||
EXPECT_EQ(2u, count_highest(counter, 2)); // return 2 entries
|
||||
}
|
||||
|
||||
// tests for partial sort and invalidation
|
||||
TEST(BoundedKeyCounter, GetNumSorted)
|
||||
{
|
||||
struct MockCounter : public BoundedKeyCounter<int, int> {
|
||||
using BoundedKeyCounter<int, int>::BoundedKeyCounter;
|
||||
// expose as public for testing sort invalidations
|
||||
using BoundedKeyCounter<int, int>::get_num_sorted;
|
||||
};
|
||||
|
||||
MockCounter counter(10);
|
||||
|
||||
EXPECT_EQ(0u, counter.get_num_sorted());
|
||||
EXPECT_EQ(0u, count_highest(counter, 10));
|
||||
EXPECT_EQ(0u, counter.get_num_sorted());
|
||||
|
||||
EXPECT_EQ(2, counter.insert(2, 2));
|
||||
EXPECT_EQ(3, counter.insert(3, 3));
|
||||
EXPECT_EQ(4, counter.insert(4, 4));
|
||||
EXPECT_EQ(0u, counter.get_num_sorted());
|
||||
|
||||
EXPECT_EQ(0u, count_highest(counter, 0));
|
||||
EXPECT_EQ(0u, counter.get_num_sorted());
|
||||
EXPECT_EQ(1u, count_highest(counter, 1));
|
||||
EXPECT_EQ(1u, counter.get_num_sorted());
|
||||
EXPECT_EQ(2u, count_highest(counter, 2));
|
||||
EXPECT_EQ(2u, counter.get_num_sorted());
|
||||
EXPECT_EQ(3u, count_highest(counter, 10));
|
||||
EXPECT_EQ(3u, counter.get_num_sorted());
|
||||
|
||||
EXPECT_EQ(1, counter.insert(1, 1)); // insert at bottom does not invalidate
|
||||
EXPECT_EQ(3u, counter.get_num_sorted());
|
||||
|
||||
EXPECT_EQ(4u, count_highest(counter, 10));
|
||||
EXPECT_EQ(4u, counter.get_num_sorted());
|
||||
|
||||
EXPECT_EQ(5, counter.insert(5, 5)); // insert at top invalidates sort
|
||||
EXPECT_EQ(0u, counter.get_num_sorted());
|
||||
|
||||
EXPECT_EQ(0u, count_highest(counter, 0));
|
||||
EXPECT_EQ(0u, counter.get_num_sorted());
|
||||
EXPECT_EQ(1u, count_highest(counter, 1));
|
||||
EXPECT_EQ(1u, counter.get_num_sorted());
|
||||
EXPECT_EQ(2u, count_highest(counter, 2));
|
||||
EXPECT_EQ(2u, counter.get_num_sorted());
|
||||
EXPECT_EQ(3u, count_highest(counter, 3));
|
||||
EXPECT_EQ(3u, counter.get_num_sorted());
|
||||
EXPECT_EQ(4u, count_highest(counter, 4));
|
||||
EXPECT_EQ(4u, counter.get_num_sorted());
|
||||
EXPECT_EQ(5u, count_highest(counter, 10));
|
||||
EXPECT_EQ(5u, counter.get_num_sorted());
|
||||
|
||||
// updating an existing counter only invalidates entries <= that counter
|
||||
EXPECT_EQ(2, counter.insert(1)); // invalidates {1,2} and {2,2}
|
||||
EXPECT_EQ(3u, counter.get_num_sorted());
|
||||
|
||||
EXPECT_EQ(5u, count_highest(counter, 10));
|
||||
EXPECT_EQ(5u, counter.get_num_sorted());
|
||||
|
||||
counter.clear(); // invalidates sort
|
||||
EXPECT_EQ(0u, counter.get_num_sorted());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user