slightly faster size class cache

Lower bits of page index are still used as index into hash
table. Those lower bits are zeroed, or-ed with size class and
placed into hash table. So checking is just loading value from hash
table, xoring with higher bits of address and checking if resultant
value is lower than 128. Notably, size class 0 is not considered
"invalid" anymore.
This commit is contained in:
Aliaksey Kandratsenka 2017-02-20 22:57:43 -08:00
parent b57c0bad41
commit 121b1cb32e
6 changed files with 104 additions and 103 deletions

View File

@ -340,7 +340,7 @@ void CentralFreeList::Populate() {
// (Instead of being eager, we could just replace any stale info
// about this span, but that seems to be no better in practice.)
for (int i = 0; i < npages; i++) {
Static::pageheap()->CacheSizeClass(span->start + i, size_class_);
Static::pageheap()->SetCachedSizeClass(span->start + i, size_class_);
}
// Split the block into pieces and add to the free-list

View File

@ -118,6 +118,7 @@
#include <stdint.h> // for uintptr_t
#endif
#include "base/basictypes.h"
#include "common.h"
#include "internal_logging.h"
// A safe way of doing "(1 << n) - 1" -- without worrying about overflow
@ -128,12 +129,14 @@
// The types K and V provide upper bounds on the number of valid keys
// and values, but we explicitly require the keys to be less than
// 2^kKeybits and the values to be less than 2^kValuebits. The size of
// the table is controlled by kHashbits, and the type of each entry in
// the cache is T. See also the big comment at the top of the file.
template <int kKeybits, typename T>
// 2^kKeybits and the values to be less than 2^kValuebits. The size
// of the table is controlled by kHashbits, and the type of each entry
// in the cache is uintptr_t (native machine word). See also the big
// comment at the top of the file.
template <int kKeybits>
class PackedCache {
public:
typedef uintptr_t T;
typedef uintptr_t K;
typedef size_t V;
#ifdef TCMALLOC_SMALL_BUT_SLOW
@ -143,15 +146,36 @@ class PackedCache {
static const int kHashbits = 16;
#endif
static const int kValuebits = 7;
static const bool kUseWholeKeys = kKeybits + kValuebits <= 8 * sizeof(T);
// one bit after value bits
static const int kInvalidMask = 0x80;
explicit PackedCache(V initial_value) {
COMPILE_ASSERT(kKeybits <= sizeof(K) * 8, key_size);
COMPILE_ASSERT(kValuebits <= sizeof(V) * 8, value_size);
explicit PackedCache() {
COMPILE_ASSERT(kKeybits + kValuebits + 1 <= 8 * sizeof(T), use_whole_keys);
COMPILE_ASSERT(kHashbits <= kKeybits, hash_function);
COMPILE_ASSERT(kKeybits - kHashbits + kValuebits <= kTbits,
entry_size_must_be_big_enough);
Clear(initial_value);
COMPILE_ASSERT(kHashbits >= kValuebits + 1, small_values_space);
Clear();
}
bool TryGet(K key, V* out) const {
// As with other code in this class, we touch array_ as few times
// as we can. Assuming entries are read atomically then certain
// races are harmless.
ASSERT(key == (key & kKeyMask));
T hash = Hash(key);
T expected_entry = key;
expected_entry &= ~N_ONES_(T, kHashbits);
T entry = array_[hash];
entry ^= expected_entry;
if (PREDICT_FALSE(entry >= (1 << kValuebits))) {
return false;
}
*out = static_cast<V>(entry);
return true;
}
void Clear() {
// sets 'invalid' bit in every byte, include value byte
memset(const_cast<T* >(array_), kInvalidMask, sizeof(array_));
}
void Put(K key, V value) {
@ -160,72 +184,25 @@ class PackedCache {
array_[Hash(key)] = KeyToUpper(key) | value;
}
bool Has(K key) const {
void Invalidate(K key) {
ASSERT(key == (key & kKeyMask));
return KeyMatch(array_[Hash(key)], key);
}
V GetOrDefault(K key, V default_value) const {
// As with other code in this class, we touch array_ as few times
// as we can. Assuming entries are read atomically (e.g., their
// type is uintptr_t on most hardware) then certain races are
// harmless.
ASSERT(key == (key & kKeyMask));
T entry = array_[Hash(key)];
return KeyMatch(entry, key) ? EntryToValue(entry) : default_value;
}
void Clear(V value) {
ASSERT(value == (value & kValueMask));
for (int i = 0; i < 1 << kHashbits; i++) {
ASSERT(kUseWholeKeys || KeyToUpper(i) == 0);
array_[i] = kUseWholeKeys ? (value | KeyToUpper(i)) : value;
}
array_[Hash(key)] = KeyToUpper(key) | kInvalidMask;
}
private:
// We are going to pack a value and the upper part of a key (or a
// whole key) into an entry of type T. The UPPER type is for the
// upper part of a key, after the key has been masked and shifted
// for inclusion in an entry.
typedef T UPPER;
static V EntryToValue(T t) { return t & kValueMask; }
// If we have space for a whole key, we just shift it left.
// Otherwise kHashbits determines where in a K to find the upper
// part of the key, and kValuebits determines where in the entry to
// put it.
static UPPER KeyToUpper(K k) {
if (kUseWholeKeys) {
return static_cast<T>(k) << kValuebits;
} else {
const int shift = kHashbits - kValuebits;
// Assume kHashbits >= kValuebits. It'd be easy to lift this assumption.
return static_cast<T>(k >> shift) & kUpperMask;
}
// we just wipe all hash bits out of key. I.e. clear lower
// kHashbits. We rely on compiler knowing value of Hash(k).
static T KeyToUpper(K k) {
return static_cast<T>(k) ^ Hash(k);
}
static size_t Hash(K key) {
return static_cast<size_t>(key) & N_ONES_(size_t, kHashbits);
static T Hash(K key) {
return static_cast<T>(key) & N_ONES_(size_t, kHashbits);
}
// Does the entry match the relevant part of the given key?
static bool KeyMatch(T entry, K key) {
return kUseWholeKeys ?
(entry >> kValuebits == key) :
((KeyToUpper(key) ^ entry) & kUpperMask) == 0;
}
static const int kTbits = 8 * sizeof(T);
static const int kUpperbits = kUseWholeKeys ? kKeybits : kKeybits - kHashbits;
// For masking a K.
static const K kKeyMask = N_ONES_(K, kKeybits);
// For masking a T.
static const T kUpperMask = N_ONES_(T, kUpperbits) << kValuebits;
// For masking a V or a T.
static const V kValueMask = N_ONES_(V, kValuebits);

View File

@ -64,7 +64,6 @@ namespace tcmalloc {
PageHeap::PageHeap()
: pagemap_(MetaDataAlloc),
pagemap_cache_(0),
scavenge_counter_(0),
// Start scavenging at kMaxPages list
release_index_(kMaxPages),

View File

@ -83,7 +83,6 @@ namespace tcmalloc {
template <int BITS> class MapSelector {
public:
typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
typedef PackedCache<BITS-kPageShift, uint64_t> CacheType;
};
#ifndef TCMALLOC_SMALL_BUT_SLOW
@ -94,7 +93,6 @@ template <int BITS> class MapSelector {
template <> class MapSelector<48> {
public:
typedef TCMalloc_PageMap2<48-kPageShift> Type;
typedef PackedCache<48-kPageShift, uint64_t> CacheType;
};
#endif // TCMALLOC_SMALL_BUT_SLOW
@ -103,7 +101,6 @@ template <> class MapSelector<48> {
template <> class MapSelector<32> {
public:
typedef TCMalloc_PageMap2<32-kPageShift> Type;
typedef PackedCache<32-kPageShift, uint16_t> CacheType;
};
// -------------------------------------------------------------------------
@ -195,15 +192,22 @@ class PERFTOOLS_DLL_DECL PageHeap {
// smaller released and unreleased ranges.
Length ReleaseAtLeastNPages(Length num_pages);
// Return 0 if we have no information, or else the correct sizeclass for p.
// Reads and writes to pagemap_cache_ do not require locking.
// The entries are 64 bits on 64-bit hardware and 16 bits on
// 32-bit hardware, and we don't mind raciness as long as each read of
// an entry yields a valid entry, not a partially updated entry.
size_t GetSizeClassIfCached(PageID p) const {
return pagemap_cache_.GetOrDefault(p, 0);
bool TryGetSizeClass(PageID p, size_t* out) const {
return pagemap_cache_.TryGet(p, out);
}
void SetCachedSizeClass(PageID p, size_t cl) {
ASSERT(cl != 0);
pagemap_cache_.Put(p, cl);
}
void InvalidateCachedSizeClass(PageID p) { pagemap_cache_.Invalidate(p); }
size_t GetSizeClassOrZero(PageID p) const {
size_t cached_value;
if (!TryGetSizeClass(p, &cached_value)) {
cached_value = 0;
}
return cached_value;
}
void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
bool GetAggressiveDecommit(void) {return aggressive_decommit_;}
void SetAggressiveDecommit(bool aggressive_decommit) {
@ -235,9 +239,9 @@ class PERFTOOLS_DLL_DECL PageHeap {
// Pick the appropriate map and cache types based on pointer size
typedef MapSelector<kAddressBits>::Type PageMap;
typedef MapSelector<kAddressBits>::CacheType PageMapCache;
PageMap pagemap_;
typedef PackedCache<kAddressBits - kPageShift> PageMapCache;
mutable PageMapCache pagemap_cache_;
PageMap pagemap_;
// We segregate spans of a given size into two circular linked
// lists: one for normal spans, and one for spans whose memory

View File

@ -821,8 +821,8 @@ class TCMallocImplementation : public MallocExtension {
if ((p >> (kAddressBits - kPageShift)) > 0) {
return kNotOwned;
}
size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
if (cl != 0) {
size_t cl;
if (Static::pageheap()->TryGetSizeClass(p, &cl)) {
return kOwned;
}
const Span *span = Static::pageheap()->GetDescriptor(p);
@ -1054,9 +1054,11 @@ static TCMallocGuard module_enter_exit_hook;
static inline bool CheckCachedSizeClass(void *ptr) {
PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
return cached_value == 0 ||
cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
size_t cached_value;
if (!Static::pageheap()->TryGetSizeClass(p, &cached_value)) {
return true;
}
return cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
}
static inline void* CheckedMallocResult(void *result) {
@ -1065,7 +1067,7 @@ static inline void* CheckedMallocResult(void *result) {
}
static inline void* SpanToMallocResult(Span *span) {
Static::pageheap()->CacheSizeClass(span->start, 0);
Static::pageheap()->InvalidateCachedSizeClass(span->start);
return
CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
}
@ -1372,8 +1374,7 @@ ALWAYS_INLINE void do_free_helper(void* ptr,
goto non_zero;
}
cl = Static::pageheap()->GetSizeClassIfCached(p);
if (PREDICT_FALSE(cl == 0)) {
if (!Static::pageheap()->TryGetSizeClass(p, &cl)) {
span = Static::pageheap()->GetDescriptor(p);
if (PREDICT_FALSE(!span)) {
// span can be NULL because the pointer passed in is NULL or invalid
@ -1387,7 +1388,9 @@ ALWAYS_INLINE void do_free_helper(void* ptr,
return;
}
cl = span->sizeclass;
Static::pageheap()->CacheSizeClass(p, cl);
if (cl != 0) {
Static::pageheap()->SetCachedSizeClass(p, cl);
}
}
ASSERT(ptr != NULL);
@ -1445,8 +1448,8 @@ inline size_t GetSizeWithCallback(const void* ptr,
if (ptr == NULL)
return 0;
const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
if (cl != 0) {
size_t cl;
if (Static::pageheap()->TryGetSizeClass(p, &cl)) {
return Static::sizemap()->ByteSizeForClass(cl);
}
@ -1456,7 +1459,6 @@ inline size_t GetSizeWithCallback(const void* ptr,
}
if (span->sizeclass != 0) {
Static::pageheap()->CacheSizeClass(p, span->sizeclass);
return Static::sizemap()->ByteSizeForClass(span->sizeclass);
}

View File

@ -35,24 +35,43 @@
#include "base/logging.h"
#include "packed-cache-inl.h"
static const int kHashbits = PackedCache<64, uint64>::kHashbits;
static const int kHashbits = PackedCache<20>::kHashbits;
template <int kKeybits>
static size_t MustGet(const PackedCache<kKeybits>& cache, uintptr_t key) {
size_t rv;
CHECK(cache.TryGet(key, &rv));
return rv;
}
template <int kKeybits>
static size_t Has(const PackedCache<kKeybits>& cache, uintptr_t key) {
size_t dummy;
return cache.TryGet(key, &dummy);
}
// A basic sanity test.
void PackedCacheTest_basic() {
PackedCache<32, uint32> cache(0);
CHECK_EQ(cache.GetOrDefault(0, 1), 0);
PackedCache<20> cache;
CHECK(!Has(cache, 0));
cache.Put(0, 17);
CHECK(cache.Has(0));
CHECK_EQ(cache.GetOrDefault(0, 1), 17);
CHECK(Has(cache, 0));
CHECK_EQ(MustGet(cache, 0), 17);
cache.Put(19, 99);
CHECK(cache.Has(0) && cache.Has(19));
CHECK_EQ(cache.GetOrDefault(0, 1), 17);
CHECK_EQ(cache.GetOrDefault(19, 1), 99);
CHECK_EQ(MustGet(cache, 0), 17);
CHECK_EQ(MustGet(cache, 19), 99);
// Knock <0, 17> out by using a conflicting key.
cache.Put(1 << kHashbits, 22);
CHECK(!cache.Has(0));
CHECK_EQ(cache.GetOrDefault(0, 1), 1);
CHECK_EQ(cache.GetOrDefault(1 << kHashbits, 1), 22);
CHECK(!Has(cache, 0));
CHECK_EQ(MustGet(cache, 1 << kHashbits), 22);
cache.Invalidate(19);
CHECK(!Has(cache, 19));
CHECK(!Has(cache, 0));
CHECK(Has(cache, 1 << kHashbits));
}
int main(int argc, char **argv) {