mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
This is a portion of Part 1 of the namespace project: using ADL properly in encode and decode so we can use namespaces easily in Ceph. Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
454 lines
12 KiB
C++
454 lines
12 KiB
C++
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
|
// vim: ts=8 sw=2 smarttab
|
|
/*
|
|
* Ceph - scalable distributed file system
|
|
*
|
|
* Copyright (C) 2013 Inktank <info@inktank.com>
|
|
*
|
|
* This is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License version 2.1, as published by the Free Software
|
|
* Foundation. See file COPYING.
|
|
*
|
|
*/
|
|
|
|
#ifndef CEPH_OSD_HITSET_H
|
|
#define CEPH_OSD_HITSET_H
|
|
|
|
#include <boost/scoped_ptr.hpp>
|
|
|
|
#include "include/encoding.h"
|
|
#include "include/unordered_set.h"
|
|
#include "common/bloom_filter.hpp"
|
|
#include "common/hobject.h"
|
|
|
|
/**
|
|
* generic container for a HitSet
|
|
*
|
|
* Encapsulate a HitSetImpl of any type. Expose a generic interface
|
|
* to users and wrap the encoded object with a type so that it can be
|
|
* safely decoded later.
|
|
*/
|
|
|
|
class HitSet {
|
|
public:
|
|
typedef enum {
|
|
TYPE_NONE = 0,
|
|
TYPE_EXPLICIT_HASH = 1,
|
|
TYPE_EXPLICIT_OBJECT = 2,
|
|
TYPE_BLOOM = 3
|
|
} impl_type_t;
|
|
|
|
static const char *get_type_name(impl_type_t t) {
|
|
switch (t) {
|
|
case TYPE_NONE: return "none";
|
|
case TYPE_EXPLICIT_HASH: return "explicit_hash";
|
|
case TYPE_EXPLICIT_OBJECT: return "explicit_object";
|
|
case TYPE_BLOOM: return "bloom";
|
|
default: return "???";
|
|
}
|
|
}
|
|
const char *get_type_name() const {
|
|
if (impl)
|
|
return get_type_name(impl->get_type());
|
|
return get_type_name(TYPE_NONE);
|
|
}
|
|
|
|
/// abstract interface for a HitSet implementation
|
|
class Impl {
|
|
public:
|
|
virtual impl_type_t get_type() const = 0;
|
|
virtual bool is_full() const = 0;
|
|
virtual void insert(const hobject_t& o) = 0;
|
|
virtual bool contains(const hobject_t& o) const = 0;
|
|
virtual unsigned insert_count() const = 0;
|
|
virtual unsigned approx_unique_insert_count() const = 0;
|
|
virtual void encode(bufferlist &bl) const = 0;
|
|
virtual void decode(bufferlist::iterator& p) = 0;
|
|
virtual void dump(Formatter *f) const = 0;
|
|
virtual Impl* clone() const = 0;
|
|
virtual void seal() {}
|
|
virtual ~Impl() {}
|
|
};
|
|
|
|
boost::scoped_ptr<Impl> impl;
|
|
bool sealed;
|
|
|
|
class Params {
|
|
/// create an Impl* of the given type
|
|
bool create_impl(impl_type_t t);
|
|
|
|
public:
|
|
class Impl {
|
|
public:
|
|
virtual impl_type_t get_type() const = 0;
|
|
virtual HitSet::Impl *get_new_impl() const = 0;
|
|
virtual void encode(bufferlist &bl) const {}
|
|
virtual void decode(bufferlist::iterator& p) {}
|
|
virtual void dump(Formatter *f) const {}
|
|
virtual void dump_stream(ostream& o) const {}
|
|
virtual ~Impl() {}
|
|
};
|
|
|
|
Params() {}
|
|
explicit Params(Impl *i) : impl(i) {}
|
|
virtual ~Params() {}
|
|
|
|
boost::scoped_ptr<Params::Impl> impl;
|
|
|
|
impl_type_t get_type() const {
|
|
if (impl)
|
|
return impl->get_type();
|
|
return TYPE_NONE;
|
|
}
|
|
|
|
Params(const Params& o);
|
|
const Params& operator=(const Params& o);
|
|
|
|
void encode(bufferlist &bl) const;
|
|
void decode(bufferlist::iterator &bl);
|
|
void dump(Formatter *f) const;
|
|
static void generate_test_instances(list<HitSet::Params*>& o);
|
|
|
|
friend ostream& operator<<(ostream& out, const HitSet::Params& p);
|
|
};
|
|
|
|
HitSet() : impl(NULL), sealed(false) {}
|
|
explicit HitSet(Impl *i) : impl(i), sealed(false) {}
|
|
explicit HitSet(const HitSet::Params& params);
|
|
|
|
HitSet(const HitSet& o) {
|
|
sealed = o.sealed;
|
|
if (o.impl)
|
|
impl.reset(o.impl->clone());
|
|
else
|
|
impl.reset(NULL);
|
|
}
|
|
const HitSet& operator=(const HitSet& o) {
|
|
sealed = o.sealed;
|
|
if (o.impl)
|
|
impl.reset(o.impl->clone());
|
|
else
|
|
impl.reset(NULL);
|
|
return *this;
|
|
}
|
|
|
|
|
|
bool is_full() const {
|
|
return impl->is_full();
|
|
}
|
|
/// insert a hash into the set
|
|
void insert(const hobject_t& o) {
|
|
impl->insert(o);
|
|
}
|
|
/// query whether a hash is in the set
|
|
bool contains(const hobject_t& o) const {
|
|
return impl->contains(o);
|
|
}
|
|
|
|
unsigned insert_count() const {
|
|
return impl->insert_count();
|
|
}
|
|
unsigned approx_unique_insert_count() const {
|
|
return impl->approx_unique_insert_count();
|
|
}
|
|
void seal() {
|
|
assert(!sealed);
|
|
sealed = true;
|
|
impl->seal();
|
|
}
|
|
|
|
void encode(bufferlist &bl) const;
|
|
void decode(bufferlist::iterator &bl);
|
|
void dump(Formatter *f) const;
|
|
static void generate_test_instances(list<HitSet*>& o);
|
|
|
|
private:
|
|
void reset_to_type(impl_type_t type);
|
|
};
|
|
WRITE_CLASS_ENCODER(HitSet)
|
|
WRITE_CLASS_ENCODER(HitSet::Params)
|
|
|
|
typedef boost::shared_ptr<HitSet> HitSetRef;
|
|
|
|
ostream& operator<<(ostream& out, const HitSet::Params& p);
|
|
|
|
/**
|
|
* explicitly enumerate hash hits in the set
|
|
*/
|
|
class ExplicitHashHitSet : public HitSet::Impl {
|
|
uint64_t count;
|
|
ceph::unordered_set<uint32_t> hits;
|
|
public:
|
|
class Params : public HitSet::Params::Impl {
|
|
public:
|
|
HitSet::impl_type_t get_type() const override {
|
|
return HitSet::TYPE_EXPLICIT_HASH;
|
|
}
|
|
HitSet::Impl *get_new_impl() const override {
|
|
return new ExplicitHashHitSet;
|
|
}
|
|
static void generate_test_instances(list<Params*>& o) {
|
|
o.push_back(new Params);
|
|
}
|
|
};
|
|
|
|
ExplicitHashHitSet() : count(0) {}
|
|
explicit ExplicitHashHitSet(const ExplicitHashHitSet::Params *p) : count(0) {}
|
|
ExplicitHashHitSet(const ExplicitHashHitSet &o) : count(o.count),
|
|
hits(o.hits) {}
|
|
|
|
HitSet::Impl *clone() const override {
|
|
return new ExplicitHashHitSet(*this);
|
|
}
|
|
|
|
HitSet::impl_type_t get_type() const override {
|
|
return HitSet::TYPE_EXPLICIT_HASH;
|
|
}
|
|
bool is_full() const override {
|
|
return false;
|
|
}
|
|
void insert(const hobject_t& o) override {
|
|
hits.insert(o.get_hash());
|
|
++count;
|
|
}
|
|
bool contains(const hobject_t& o) const override {
|
|
return hits.count(o.get_hash());
|
|
}
|
|
unsigned insert_count() const override {
|
|
return count;
|
|
}
|
|
unsigned approx_unique_insert_count() const override {
|
|
return hits.size();
|
|
}
|
|
void encode(bufferlist &bl) const override {
|
|
ENCODE_START(1, 1, bl);
|
|
encode(count, bl);
|
|
encode(hits, bl);
|
|
ENCODE_FINISH(bl);
|
|
}
|
|
void decode(bufferlist::iterator &bl) override {
|
|
DECODE_START(1, bl);
|
|
decode(count, bl);
|
|
decode(hits, bl);
|
|
DECODE_FINISH(bl);
|
|
}
|
|
void dump(Formatter *f) const override;
|
|
static void generate_test_instances(list<ExplicitHashHitSet*>& o) {
|
|
o.push_back(new ExplicitHashHitSet);
|
|
o.push_back(new ExplicitHashHitSet);
|
|
o.back()->insert(hobject_t());
|
|
o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
|
|
o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
|
|
}
|
|
};
|
|
WRITE_CLASS_ENCODER(ExplicitHashHitSet)
|
|
|
|
/**
|
|
* explicitly enumerate objects in the set
|
|
*/
|
|
class ExplicitObjectHitSet : public HitSet::Impl {
|
|
uint64_t count;
|
|
ceph::unordered_set<hobject_t> hits;
|
|
public:
|
|
class Params : public HitSet::Params::Impl {
|
|
public:
|
|
HitSet::impl_type_t get_type() const override {
|
|
return HitSet::TYPE_EXPLICIT_OBJECT;
|
|
}
|
|
HitSet::Impl *get_new_impl() const override {
|
|
return new ExplicitObjectHitSet;
|
|
}
|
|
static void generate_test_instances(list<Params*>& o) {
|
|
o.push_back(new Params);
|
|
}
|
|
};
|
|
|
|
ExplicitObjectHitSet() : count(0) {}
|
|
explicit ExplicitObjectHitSet(const ExplicitObjectHitSet::Params *p) : count(0) {}
|
|
ExplicitObjectHitSet(const ExplicitObjectHitSet &o) : count(o.count),
|
|
hits(o.hits) {}
|
|
|
|
HitSet::Impl *clone() const override {
|
|
return new ExplicitObjectHitSet(*this);
|
|
}
|
|
|
|
HitSet::impl_type_t get_type() const override {
|
|
return HitSet::TYPE_EXPLICIT_OBJECT;
|
|
}
|
|
bool is_full() const override {
|
|
return false;
|
|
}
|
|
void insert(const hobject_t& o) override {
|
|
hits.insert(o);
|
|
++count;
|
|
}
|
|
bool contains(const hobject_t& o) const override {
|
|
return hits.count(o);
|
|
}
|
|
unsigned insert_count() const override {
|
|
return count;
|
|
}
|
|
unsigned approx_unique_insert_count() const override {
|
|
return hits.size();
|
|
}
|
|
void encode(bufferlist &bl) const override {
|
|
ENCODE_START(1, 1, bl);
|
|
encode(count, bl);
|
|
encode(hits, bl);
|
|
ENCODE_FINISH(bl);
|
|
}
|
|
void decode(bufferlist::iterator &bl) override {
|
|
DECODE_START(1, bl);
|
|
decode(count, bl);
|
|
decode(hits, bl);
|
|
DECODE_FINISH(bl);
|
|
}
|
|
void dump(Formatter *f) const override;
|
|
static void generate_test_instances(list<ExplicitObjectHitSet*>& o) {
|
|
o.push_back(new ExplicitObjectHitSet);
|
|
o.push_back(new ExplicitObjectHitSet);
|
|
o.back()->insert(hobject_t());
|
|
o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
|
|
o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
|
|
}
|
|
};
|
|
WRITE_CLASS_ENCODER(ExplicitObjectHitSet)
|
|
|
|
/**
|
|
* use a bloom_filter to track hits to the set
|
|
*/
|
|
class BloomHitSet : public HitSet::Impl {
|
|
compressible_bloom_filter bloom;
|
|
|
|
public:
|
|
HitSet::impl_type_t get_type() const override {
|
|
return HitSet::TYPE_BLOOM;
|
|
}
|
|
|
|
class Params : public HitSet::Params::Impl {
|
|
public:
|
|
HitSet::impl_type_t get_type() const override {
|
|
return HitSet::TYPE_BLOOM;
|
|
}
|
|
HitSet::Impl *get_new_impl() const override {
|
|
return new BloomHitSet;
|
|
}
|
|
|
|
uint32_t fpp_micro; ///< false positive probability / 1M
|
|
uint64_t target_size; ///< number of unique insertions we expect to this HitSet
|
|
uint64_t seed; ///< seed to use when initializing the bloom filter
|
|
|
|
Params()
|
|
: fpp_micro(0), target_size(0), seed(0) {}
|
|
Params(double fpp, uint64_t t, uint64_t s)
|
|
: fpp_micro(fpp * 1000000.0), target_size(t), seed(s) {}
|
|
Params(const Params &o)
|
|
: fpp_micro(o.fpp_micro),
|
|
target_size(o.target_size),
|
|
seed(o.seed) {}
|
|
~Params() override {}
|
|
|
|
double get_fpp() const {
|
|
return (double)fpp_micro / 1000000.0;
|
|
}
|
|
void set_fpp(double f) {
|
|
fpp_micro = (unsigned)(llrintl(f * 1000000.0));
|
|
}
|
|
|
|
void encode(bufferlist& bl) const override {
|
|
ENCODE_START(1, 1, bl);
|
|
encode(fpp_micro, bl);
|
|
encode(target_size, bl);
|
|
encode(seed, bl);
|
|
ENCODE_FINISH(bl);
|
|
}
|
|
void decode(bufferlist::iterator& bl) override {
|
|
DECODE_START(1, bl);
|
|
decode(fpp_micro, bl);
|
|
decode(target_size, bl);
|
|
decode(seed, bl);
|
|
DECODE_FINISH(bl);
|
|
}
|
|
void dump(Formatter *f) const override;
|
|
void dump_stream(ostream& o) const override {
|
|
o << "false_positive_probability: "
|
|
<< get_fpp() << ", target_size: " << target_size
|
|
<< ", seed: " << seed;
|
|
}
|
|
static void generate_test_instances(list<Params*>& o) {
|
|
o.push_back(new Params);
|
|
o.push_back(new Params);
|
|
(*o.rbegin())->fpp_micro = 123456;
|
|
(*o.rbegin())->target_size = 300;
|
|
(*o.rbegin())->seed = 99;
|
|
}
|
|
};
|
|
|
|
BloomHitSet() {}
|
|
BloomHitSet(unsigned inserts, double fpp, int seed)
|
|
: bloom(inserts, fpp, seed)
|
|
{}
|
|
explicit BloomHitSet(const BloomHitSet::Params *p) : bloom(p->target_size,
|
|
p->get_fpp(),
|
|
p->seed)
|
|
{}
|
|
|
|
BloomHitSet(const BloomHitSet &o) {
|
|
// oh god
|
|
bufferlist bl;
|
|
o.encode(bl);
|
|
bufferlist::iterator bli = bl.begin();
|
|
this->decode(bli);
|
|
}
|
|
|
|
HitSet::Impl *clone() const override {
|
|
return new BloomHitSet(*this);
|
|
}
|
|
|
|
bool is_full() const override {
|
|
return bloom.is_full();
|
|
}
|
|
|
|
void insert(const hobject_t& o) override {
|
|
bloom.insert(o.get_hash());
|
|
}
|
|
bool contains(const hobject_t& o) const override {
|
|
return bloom.contains(o.get_hash());
|
|
}
|
|
unsigned insert_count() const override {
|
|
return bloom.element_count();
|
|
}
|
|
unsigned approx_unique_insert_count() const override {
|
|
return bloom.approx_unique_element_count();
|
|
}
|
|
void seal() override {
|
|
// aim for a density of .5 (50% of bit set)
|
|
double pc = bloom.density() * 2.0;
|
|
if (pc < 1.0)
|
|
bloom.compress(pc);
|
|
}
|
|
|
|
void encode(bufferlist &bl) const override {
|
|
ENCODE_START(1, 1, bl);
|
|
encode(bloom, bl);
|
|
ENCODE_FINISH(bl);
|
|
}
|
|
void decode(bufferlist::iterator &bl) override {
|
|
DECODE_START(1, bl);
|
|
decode(bloom, bl);
|
|
DECODE_FINISH(bl);
|
|
}
|
|
void dump(Formatter *f) const override;
|
|
static void generate_test_instances(list<BloomHitSet*>& o) {
|
|
o.push_back(new BloomHitSet);
|
|
o.push_back(new BloomHitSet(10, .1, 1));
|
|
o.back()->insert(hobject_t());
|
|
o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
|
|
o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
|
|
}
|
|
};
|
|
WRITE_CLASS_ENCODER(BloomHitSet)
|
|
|
|
#endif
|