common: introduce BoundedKeyCounter and unit test

Signed-off-by: Casey Bodley <cbodley@redhat.com>
This commit is contained in:
Casey Bodley 2017-08-24 10:01:36 -04:00
parent 4309adb36b
commit e9a5ec9f64
3 changed files with 393 additions and 0 deletions

View File

@ -0,0 +1,187 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2017 Red Hat, Inc
*
* Author: Casey Bodley <cbodley@redhat.com>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#ifndef BOUNDED_KEY_COUNTER_H
#define BOUNDED_KEY_COUNTER_H
#include <algorithm>
#include <map>
#include <tuple>
#include <vector>
#include "include/assert.h"
/**
* BoundedKeyCounter
*
* A data structure that counts the number of times a given key is inserted,
* and can return the keys with the highest counters. The number of unique keys
* is bounded by the given constructor argument, meaning that new keys will be
* rejected if they would exceed this bound.
*
* It is optimized for use where insertion is frequent, but sorted listings are
* both infrequent and tend to request a small subset of the available keys.
*/
template <typename Key, typename Count>
class BoundedKeyCounter {
/// map type to associate keys with their counter values
using map_type = std::map<Key, Count>;
using value_type = typename map_type::value_type;
/// view type used for sorting key-value pairs by their counter value
using view_type = std::vector<const value_type*>;
/// maximum number of counters to store at once
const size_t bound;
/// map of counters, with a maximum size given by 'bound'
map_type counters;
/// storage for sorted key-value pairs
view_type sorted;
/// remembers how much of the range is actually sorted
typename view_type::iterator sorted_position;
/// invalidate view of sorted entries
void invalidate_sorted()
{
sorted_position = sorted.begin();
sorted.clear();
}
/// value_type comparison function for sorting in descending order
static bool value_greater(const value_type *lhs, const value_type *rhs)
{
return lhs->second > rhs->second;
}
/// map iterator that adapts value_type to value_type*
struct const_pointer_iterator : public map_type::const_iterator {
const_pointer_iterator(typename map_type::const_iterator i)
: map_type::const_iterator(i) {}
const value_type* operator*() const {
return &map_type::const_iterator::operator*();
}
};
protected:
/// return the number of sorted entries. marked protected for unit testing
size_t get_num_sorted() const
{
using const_iterator = typename view_type::const_iterator;
return std::distance<const_iterator>(sorted.begin(), sorted_position);
}
public:
BoundedKeyCounter(size_t bound)
: bound(bound)
{
sorted.reserve(bound);
sorted_position = sorted.begin();
}
/// return the number of keys stored
size_t size() const noexcept { return counters.size(); }
/// return the maximum number of keys
size_t capacity() const noexcept { return bound; }
/// increment a counter for the given key and return its value. if the key was
/// not present, insert it. if the map is full, return 0
Count insert(const Key& key, Count n = 1)
{
typename map_type::iterator i;
if (counters.size() < bound) {
// insert new entries at count=0
bool inserted;
std::tie(i, inserted) = counters.emplace(key, 0);
if (inserted) {
sorted.push_back(&*i);
}
} else {
// when full, refuse to insert new entries
i = counters.find(key);
if (i == counters.end()) {
return 0;
}
}
i->second += n; // add to the counter
// update sorted position if necessary. use a binary search for the last
// element in the sorted range that's greater than this counter
sorted_position = std::lower_bound(sorted.begin(), sorted_position,
&*i, &value_greater);
return i->second;
}
/// remove the given key from the map of counters
void erase(const Key& key)
{
auto i = counters.find(key);
if (i == counters.end()) {
return;
}
// removing the sorted entry would require linear search; invalidate instead
invalidate_sorted();
counters.erase(i);
}
/// query the highest N key-value pairs sorted by counter value, passing each
/// in order to the given callback with arguments (Key, Count)
template <typename Callback>
void get_highest(size_t count, Callback&& cb)
{
if (sorted.empty()) {
// initialize the vector with pointers to all key-value pairs
sorted.assign(const_pointer_iterator{counters.cbegin()},
const_pointer_iterator{counters.cend()});
// entire range is unsorted
assert(sorted_position == sorted.begin());
}
const size_t sorted_count = get_num_sorted();
if (sorted_count < count) {
// move sorted_position to cover the requested number of entries
sorted_position = sorted.begin() + std::min(count, sorted.size());
// sort all entries in descending order up to the given position
std::partial_sort(sorted.begin(), sorted_position, sorted.end(),
&value_greater);
}
// return the requested range via callback
for (const auto& pair : sorted) {
if (count-- == 0) {
return;
}
cb(pair->first, pair->second);
}
}
/// remove all keys and counters and invalidate the sorted range
void clear()
{
invalidate_sorted();
counters.clear();
}
};
#endif // BOUNDED_KEY_COUNTER_H

View File

@ -271,3 +271,9 @@ add_ceph_unittest(unittest_backport14)
add_executable(unittest_convenience test_convenience.cc)
target_link_libraries(unittest_convenience ceph-common)
add_ceph_unittest(unittest_convenience)
add_executable(unittest_bounded_key_counter
test_bounded_key_counter.cc
$<TARGET_OBJECTS:unit-main>)
target_link_libraries(unittest_bounded_key_counter global)
add_ceph_unittest(unittest_bounded_key_counter)

View File

@ -0,0 +1,200 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2015 Red Hat
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#include "common/bounded_key_counter.h"
#include <gtest/gtest.h>
namespace {
// call get_highest() and return the number of callbacks
template <typename Key, typename Count>
size_t count_highest(BoundedKeyCounter<Key, Count>& counter, size_t count)
{
size_t callbacks = 0;
counter.get_highest(count, [&callbacks] (const Key& key, Count count) {
++callbacks;
});
return callbacks;
}
// call get_highest() and return the key/value pairs as a vector
template <typename Key, typename Count,
typename Vector = std::vector<std::pair<Key, Count>>>
Vector get_highest(BoundedKeyCounter<Key, Count>& counter, size_t count)
{
Vector results;
counter.get_highest(count, [&results] (const Key& key, Count count) {
results.emplace_back(key, count);
});
return results;
}
} // anonymous namespace
TEST(BoundedKeyCounter, Insert)
{
BoundedKeyCounter<int, int> counter(2);
EXPECT_EQ(1, counter.insert(0)); // insert new key
EXPECT_EQ(2, counter.insert(0)); // increment counter
EXPECT_EQ(7, counter.insert(0, 5)); // add 5 to counter
EXPECT_EQ(1, counter.insert(1)); // insert new key
EXPECT_EQ(0, counter.insert(2)); // reject new key
}
TEST(BoundedKeyCounter, Erase)
{
BoundedKeyCounter<int, int> counter(10);
counter.erase(0); // ok to erase nonexistent key
EXPECT_EQ(1, counter.insert(1, 1));
EXPECT_EQ(2, counter.insert(2, 2));
EXPECT_EQ(3, counter.insert(3, 3));
counter.erase(2);
counter.erase(1);
counter.erase(3);
counter.erase(3);
EXPECT_EQ(0u, count_highest(counter, 10));
}
TEST(BoundedKeyCounter, Size)
{
BoundedKeyCounter<int, int> counter(4);
EXPECT_EQ(0u, counter.size());
EXPECT_EQ(1, counter.insert(1, 1));
EXPECT_EQ(1u, counter.size());
EXPECT_EQ(2, counter.insert(2, 2));
EXPECT_EQ(2u, counter.size());
EXPECT_EQ(3, counter.insert(3, 3));
EXPECT_EQ(3u, counter.size());
EXPECT_EQ(4, counter.insert(4, 4));
EXPECT_EQ(4u, counter.size());
EXPECT_EQ(0, counter.insert(5, 5)); // reject new key
EXPECT_EQ(4u, counter.size()); // size unchanged
EXPECT_EQ(5, counter.insert(4, 1)); // update existing key
EXPECT_EQ(4u, counter.size()); // size unchanged
counter.erase(2);
EXPECT_EQ(3u, counter.size());
counter.erase(2); // erase duplicate
EXPECT_EQ(3u, counter.size()); // size unchanged
counter.erase(4);
EXPECT_EQ(2u, counter.size());
counter.erase(1);
EXPECT_EQ(1u, counter.size());
counter.erase(3);
EXPECT_EQ(0u, counter.size());
EXPECT_EQ(6, counter.insert(6, 6));
EXPECT_EQ(1u, counter.size());
counter.clear();
EXPECT_EQ(0u, counter.size());
}
TEST(BoundedKeyCounter, GetHighest)
{
BoundedKeyCounter<int, int> counter(10);
using Vector = std::vector<std::pair<int, int>>;
EXPECT_EQ(0u, count_highest(counter, 0)); // ok to request 0
EXPECT_EQ(0u, count_highest(counter, 10)); // empty
EXPECT_EQ(0u, count_highest(counter, 999)); // ok to request count >> 10
EXPECT_EQ(1, counter.insert(1, 1));
EXPECT_EQ(Vector({{1,1}}), get_highest(counter, 10));
EXPECT_EQ(2, counter.insert(2, 2));
EXPECT_EQ(Vector({{2,2},{1,1}}), get_highest(counter, 10));
EXPECT_EQ(3, counter.insert(3, 3));
EXPECT_EQ(Vector({{3,3},{2,2},{1,1}}), get_highest(counter, 10));
EXPECT_EQ(3, counter.insert(4, 3)); // insert duplicated count=3
// still returns 4 entries (but order of {3,3} and {4,3} is unspecified)
EXPECT_EQ(4u, count_highest(counter, 10));
counter.erase(3);
EXPECT_EQ(Vector({{4,3},{2,2},{1,1}}), get_highest(counter, 10));
EXPECT_EQ(0u, count_highest(counter, 0)); // requesting 0 still returns 0
}
TEST(BoundedKeyCounter, Clear)
{
BoundedKeyCounter<int, int> counter(2);
EXPECT_EQ(1, counter.insert(0)); // insert new key
EXPECT_EQ(1, counter.insert(1)); // insert new key
EXPECT_EQ(2u, count_highest(counter, 2)); // return 2 entries
counter.clear();
EXPECT_EQ(0u, count_highest(counter, 2)); // return 0 entries
EXPECT_EQ(1, counter.insert(1)); // insert new key
EXPECT_EQ(1, counter.insert(2)); // insert new unique key
EXPECT_EQ(2u, count_highest(counter, 2)); // return 2 entries
}
// tests for partial sort and invalidation
TEST(BoundedKeyCounter, GetNumSorted)
{
struct MockCounter : public BoundedKeyCounter<int, int> {
using BoundedKeyCounter<int, int>::BoundedKeyCounter;
// expose as public for testing sort invalidations
using BoundedKeyCounter<int, int>::get_num_sorted;
};
MockCounter counter(10);
EXPECT_EQ(0u, counter.get_num_sorted());
EXPECT_EQ(0u, count_highest(counter, 10));
EXPECT_EQ(0u, counter.get_num_sorted());
EXPECT_EQ(2, counter.insert(2, 2));
EXPECT_EQ(3, counter.insert(3, 3));
EXPECT_EQ(4, counter.insert(4, 4));
EXPECT_EQ(0u, counter.get_num_sorted());
EXPECT_EQ(0u, count_highest(counter, 0));
EXPECT_EQ(0u, counter.get_num_sorted());
EXPECT_EQ(1u, count_highest(counter, 1));
EXPECT_EQ(1u, counter.get_num_sorted());
EXPECT_EQ(2u, count_highest(counter, 2));
EXPECT_EQ(2u, counter.get_num_sorted());
EXPECT_EQ(3u, count_highest(counter, 10));
EXPECT_EQ(3u, counter.get_num_sorted());
EXPECT_EQ(1, counter.insert(1, 1)); // insert at bottom does not invalidate
EXPECT_EQ(3u, counter.get_num_sorted());
EXPECT_EQ(4u, count_highest(counter, 10));
EXPECT_EQ(4u, counter.get_num_sorted());
EXPECT_EQ(5, counter.insert(5, 5)); // insert at top invalidates sort
EXPECT_EQ(0u, counter.get_num_sorted());
EXPECT_EQ(0u, count_highest(counter, 0));
EXPECT_EQ(0u, counter.get_num_sorted());
EXPECT_EQ(1u, count_highest(counter, 1));
EXPECT_EQ(1u, counter.get_num_sorted());
EXPECT_EQ(2u, count_highest(counter, 2));
EXPECT_EQ(2u, counter.get_num_sorted());
EXPECT_EQ(3u, count_highest(counter, 3));
EXPECT_EQ(3u, counter.get_num_sorted());
EXPECT_EQ(4u, count_highest(counter, 4));
EXPECT_EQ(4u, counter.get_num_sorted());
EXPECT_EQ(5u, count_highest(counter, 10));
EXPECT_EQ(5u, counter.get_num_sorted());
// updating an existing counter only invalidates entries <= that counter
EXPECT_EQ(2, counter.insert(1)); // invalidates {1,2} and {2,2}
EXPECT_EQ(3u, counter.get_num_sorted());
EXPECT_EQ(5u, count_highest(counter, 10));
EXPECT_EQ(5u, counter.get_num_sorted());
counter.clear(); // invalidates sort
EXPECT_EQ(0u, counter.get_num_sorted());
}