mirror of
https://github.com/ceph/ceph
synced 2025-03-25 11:48:05 +00:00
common/bloom_filter: unit tests
Fun facts: - fpp = false positive probability - fpp is a function of insert count only - at .1% fpp, we pay about 2 bytes per insert - at 1-2% fpp, we pay about 1 byte per insert - at 15% fpp, we pay about .5 bytes per insert Signed-off-by: Sage Weil <sage@inktank.com>
This commit is contained in:
parent
12aa53cc94
commit
f1584fb05c
@ -26,6 +26,7 @@
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -470,7 +471,7 @@ public:
|
||||
void encode(bufferlist& bl) const;
|
||||
void decode(bufferlist::iterator& bl);
|
||||
void dump(Formatter *f) const;
|
||||
static void generate_test_instances(list<bloom_filter*>& ls);
|
||||
static void generate_test_instances(std::list<bloom_filter*>& ls);
|
||||
};
|
||||
WRITE_CLASS_ENCODER(bloom_filter)
|
||||
|
||||
|
@ -258,6 +258,11 @@ unittest_addrs_CXXFLAGS = $(UNITTEST_CXXFLAGS)
|
||||
unittest_addrs_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
|
||||
check_PROGRAMS += unittest_addrs
|
||||
|
||||
unittest_bloom_filter_SOURCES = test/common/test_bloom_filter.cc
|
||||
unittest_bloom_filter_CXXFLAGS = $(UNITTEST_CXXFLAGS)
|
||||
unittest_bloom_filter_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
|
||||
check_PROGRAMS += unittest_bloom_filter
|
||||
|
||||
unittest_sharedptr_registry_SOURCES = test/common/test_sharedptr_registry.cc
|
||||
unittest_sharedptr_registry_CXXFLAGS = $(UNITTEST_CXXFLAGS)
|
||||
unittest_sharedptr_registry_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
|
||||
|
62
src/test/common/test_bloom_filter.cc
Normal file
62
src/test/common/test_bloom_filter.cc
Normal file
@ -0,0 +1,62 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
*
|
||||
* Copyright (C) 2013 Inktank <info@inktank.com>
|
||||
*
|
||||
* LGPL2.1 (see COPYING-LGPL2.1) or later
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "include/stringify.h"
|
||||
#include "common/bloom_filter.hpp"
|
||||
|
||||
TEST(BloomFilter, Basic) {
|
||||
bloom_filter bf(10, .1, 1);
|
||||
bf.insert("foo");
|
||||
bf.insert("bar");
|
||||
|
||||
ASSERT_TRUE(bf.contains("foo"));
|
||||
ASSERT_TRUE(bf.contains("bar"));
|
||||
}
|
||||
|
||||
TEST(BloomFilter, Sweep) {
|
||||
std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl;
|
||||
for (int ex = 3; ex < 12; ex++) {
|
||||
for (float fpp = .001; fpp < .5; fpp *= 2.0) {
|
||||
int max = 2 << ex;
|
||||
bloom_filter bf(max, fpp, 1);
|
||||
bf.insert("foo");
|
||||
bf.insert("bar");
|
||||
|
||||
ASSERT_TRUE(bf.contains("foo"));
|
||||
ASSERT_TRUE(bf.contains("bar"));
|
||||
|
||||
for (int n = 0; n < max; n++)
|
||||
bf.insert("ok" + stringify(n));
|
||||
|
||||
int test = max * 100;
|
||||
int hit = 0;
|
||||
for (int n = 0; n < test; n++)
|
||||
if (bf.contains("asdf" + stringify(n)))
|
||||
hit++;
|
||||
|
||||
ASSERT_TRUE(bf.contains("foo"));
|
||||
ASSERT_TRUE(bf.contains("bar"));
|
||||
|
||||
double actual = (double)hit / (double)test;
|
||||
|
||||
bufferlist bl;
|
||||
::encode(bf, bl);
|
||||
|
||||
double byte_per_insert = (double)bl.length() / (double)max;
|
||||
|
||||
std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert << std::endl;
|
||||
ASSERT_TRUE(actual < fpp * 10);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user