common/bloom_filter: unit tests

Fun facts:

- fpp = false positive probability
- fpp is a function of insert count only
- at .1% fpp, we pay about 2 bytes per insert
- at 1-2% fpp, we pay about 1 byte per insert
- at 15% fpp, we pay about .5 bytes per insert

Signed-off-by: Sage Weil <sage@inktank.com>
This commit is contained in:
Sage Weil 2013-09-19 17:57:14 -07:00
parent 12aa53cc94
commit f1584fb05c
3 changed files with 69 additions and 1 deletions

View File

@ -26,6 +26,7 @@
#include <algorithm>
#include <cmath>
#include <limits>
#include <list>
#include <string>
#include <vector>
@ -470,7 +471,7 @@ public:
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);
void dump(Formatter *f) const;
static void generate_test_instances(list<bloom_filter*>& ls);
static void generate_test_instances(std::list<bloom_filter*>& ls);
};
WRITE_CLASS_ENCODER(bloom_filter)

View File

@ -258,6 +258,11 @@ unittest_addrs_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_addrs_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
check_PROGRAMS += unittest_addrs
unittest_bloom_filter_SOURCES = test/common/test_bloom_filter.cc
unittest_bloom_filter_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_bloom_filter_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)
check_PROGRAMS += unittest_bloom_filter
unittest_sharedptr_registry_SOURCES = test/common/test_sharedptr_registry.cc
unittest_sharedptr_registry_CXXFLAGS = $(UNITTEST_CXXFLAGS)
unittest_sharedptr_registry_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL)

View File

@ -0,0 +1,62 @@
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2013 Inktank <info@inktank.com>
*
* LGPL2.1 (see COPYING-LGPL2.1) or later
*/
#include <iostream>
#include <gtest/gtest.h>
#include "include/stringify.h"
#include "common/bloom_filter.hpp"
TEST(BloomFilter, Basic) {
bloom_filter bf(10, .1, 1);
bf.insert("foo");
bf.insert("bar");
ASSERT_TRUE(bf.contains("foo"));
ASSERT_TRUE(bf.contains("bar"));
}
TEST(BloomFilter, Sweep) {
std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl;
for (int ex = 3; ex < 12; ex++) {
for (float fpp = .001; fpp < .5; fpp *= 2.0) {
int max = 2 << ex;
bloom_filter bf(max, fpp, 1);
bf.insert("foo");
bf.insert("bar");
ASSERT_TRUE(bf.contains("foo"));
ASSERT_TRUE(bf.contains("bar"));
for (int n = 0; n < max; n++)
bf.insert("ok" + stringify(n));
int test = max * 100;
int hit = 0;
for (int n = 0; n < test; n++)
if (bf.contains("asdf" + stringify(n)))
hit++;
ASSERT_TRUE(bf.contains("foo"));
ASSERT_TRUE(bf.contains("bar"));
double actual = (double)hit / (double)test;
bufferlist bl;
::encode(bf, bl);
double byte_per_insert = (double)bl.length() / (double)max;
std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert << std::endl;
ASSERT_TRUE(actual < fpp * 10);
}
}
}