Drop de-duplication of heap sample (aka heapz) entries.

pprof can aggregate them, and it can do it way better than we can. With
proper unsampling etc.
This commit is contained in:
Aliaksey Kandratsenka 2018-08-27 20:10:09 -07:00
parent 954f9dc0e3
commit 63a12a5ed3
5 changed files with 56 additions and 115 deletions

View File

@ -42,27 +42,15 @@
namespace tcmalloc {
bool StackTraceTable::Bucket::KeyEqual(uintptr_t h,
const StackTrace& t) const {
const bool eq = (this->hash == h && this->trace.depth == t.depth);
for (int i = 0; eq && i < t.depth; ++i) {
if (this->trace.stack[i] != t.stack[i]) {
return false;
}
}
return eq;
}
StackTraceTable::StackTraceTable()
: error_(false),
depth_total_(0),
bucket_total_(0),
table_(new Bucket*[kHashTableSize]()) {
memset(table_, 0, kHashTableSize * sizeof(Bucket*));
head_(nullptr) {
}
StackTraceTable::~StackTraceTable() {
delete[] table_;
ASSERT(head_ == nullptr);
}
void StackTraceTable::AddTrace(const StackTrace& t) {
@ -70,89 +58,64 @@ void StackTraceTable::AddTrace(const StackTrace& t) {
return;
}
// Hash function borrowed from base/heap-profile-table.cc
uintptr_t h = 0;
for (int i = 0; i < t.depth; ++i) {
h += reinterpret_cast<uintptr_t>(t.stack[i]);
h += h << 10;
h ^= h >> 6;
}
h += h << 3;
h ^= h >> 11;
const int idx = h % kHashTableSize;
Bucket* b = table_[idx];
while (b != NULL && !b->KeyEqual(h, t)) {
b = b->next;
}
if (b != NULL) {
b->count++;
b->trace.size += t.size; // keep cumulative size
} else {
depth_total_ += t.depth;
bucket_total_++;
b = Static::bucket_allocator()->New();
if (b == NULL) {
Entry* entry = allocator_.allocate(1);
if (entry == nullptr) {
Log(kLog, __FILE__, __LINE__,
"tcmalloc: could not allocate bucket", sizeof(*b));
"tcmalloc: could not allocate bucket", sizeof(*entry));
error_ = true;
} else {
b->hash = h;
b->trace = t;
b->count = 1;
b->next = table_[idx];
table_[idx] = b;
}
entry->trace = t;
entry->next = head_;
head_ = entry;
}
}
void** StackTraceTable::ReadStackTracesAndClear() {
if (error_) {
return NULL;
}
void** out = nullptr;
// Allocate output array
const int out_len = bucket_total_ * 3 + depth_total_ + 1;
void** out = new void*[out_len];
if (out == NULL) {
if (!error_) {
// Allocate output array
out = new (std::nothrow_t{}) void*[out_len];
if (out == nullptr) {
Log(kLog, __FILE__, __LINE__,
"tcmalloc: allocation failed for stack traces",
out_len * sizeof(*out));
return NULL;
}
}
if (out) {
// Fill output array
int idx = 0;
for (int i = 0; i < kHashTableSize; ++i) {
Bucket* b = table_[i];
while (b != NULL) {
out[idx++] = reinterpret_cast<void*>(static_cast<uintptr_t>(b->count));
out[idx++] = reinterpret_cast<void*>(b->trace.size); // cumulative size
out[idx++] = reinterpret_cast<void*>(b->trace.depth);
for (int d = 0; d < b->trace.depth; ++d) {
out[idx++] = b->trace.stack[d];
}
b = b->next;
Entry* entry = head_;
while (entry != NULL) {
out[idx++] = reinterpret_cast<void*>(uintptr_t{1}); // count
out[idx++] = reinterpret_cast<void*>(entry->trace.size); // cumulative size
out[idx++] = reinterpret_cast<void*>(entry->trace.depth);
for (int d = 0; d < entry->trace.depth; ++d) {
out[idx++] = entry->trace.stack[d];
}
entry = entry->next;
}
out[idx++] = NULL;
ASSERT(idx == out_len);
}
// Clear state
error_ = false;
depth_total_ = 0;
bucket_total_ = 0;
SpinLockHolder h(Static::pageheap_lock());
for (int i = 0; i < kHashTableSize; ++i) {
Bucket* b = table_[i];
while (b != NULL) {
Bucket* next = b->next;
Static::bucket_allocator()->Delete(b);
b = next;
}
table_[i] = NULL;
Entry* entry = head_;
while (entry != nullptr) {
Entry* next = entry->next;
allocator_.deallocate(entry, 1);
entry = next;
}
head_ = nullptr;
return out;
}

View File

@ -41,6 +41,7 @@
#include <stdint.h> // for uintptr_t
#endif
#include "common.h"
#include "page_heap_allocator.h"
namespace tcmalloc {
@ -62,29 +63,21 @@ class PERFTOOLS_DLL_DECL StackTraceTable {
void** ReadStackTracesAndClear();
// Exposed for PageHeapAllocator
struct Bucket {
// Key
uintptr_t hash;
StackTrace trace;
// Payload
int count;
Bucket* next;
bool KeyEqual(uintptr_t h, const StackTrace& t) const;
};
// For testing
int depth_total() const { return depth_total_; }
int bucket_total() const { return bucket_total_; }
private:
static const int kHashTableSize = 1 << 14; // => table_ is 128k
struct Entry {
Entry* next;
StackTrace trace;
};
bool error_;
int depth_total_;
int bucket_total_;
Bucket** table_;
Entry* head_;
STLPageHeapAllocator<Entry, void> allocator_;
};
} // namespace tcmalloc

View File

@ -74,7 +74,6 @@ CentralFreeListPadded Static::central_cache_[kClassSizesMax];
PageHeapAllocator<Span> Static::span_allocator_;
PageHeapAllocator<StackTrace> Static::stacktrace_allocator_;
Span Static::sampled_objects_;
PageHeapAllocator<StackTraceTable::Bucket> Static::bucket_allocator_;
StackTrace* Static::growth_stacks_ = NULL;
Static::PageHeapStorage Static::pageheap_;
@ -84,7 +83,6 @@ void Static::InitStaticVars() {
span_allocator_.New(); // Reduce cache conflicts
span_allocator_.New(); // Reduce cache conflicts
stacktrace_allocator_.Init();
bucket_allocator_.Init();
// Do a bit of sanitizing: make sure central_cache is aligned properly
CHECK_CONDITION((sizeof(central_cache_[0]) % 64) == 0);
for (int i = 0; i < num_size_classes(); ++i) {

View File

@ -83,9 +83,6 @@ class Static {
// State kept for sampled allocations (/pprof/heap support)
static Span* sampled_objects() { return &sampled_objects_; }
static PageHeapAllocator<StackTraceTable::Bucket>* bucket_allocator() {
return &bucket_allocator_;
}
// Check if InitStaticVars() has been run.
static bool IsInited() { return inited_; }
@ -107,7 +104,6 @@ class Static {
ATTRIBUTE_HIDDEN static PageHeapAllocator<Span> span_allocator_;
ATTRIBUTE_HIDDEN static PageHeapAllocator<StackTrace> stacktrace_allocator_;
ATTRIBUTE_HIDDEN static Span sampled_objects_;
ATTRIBUTE_HIDDEN static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_;
// Linked list of stack traces recorded every time we allocated memory
// from the system. Useful for finding allocation sites that cause

View File

@ -70,18 +70,10 @@ int main(int argc, char **argv) {
AddTrace(&table, t2);
CHECK_EQ(table.depth_total(), 4);
CHECK_EQ(table.bucket_total(), 2);
static const uintptr_t k3[] = {1, 1024, 2, 1, 2, 1, 512, 2, 2, 1, 0};
static const uintptr_t k3[] = {1, 512, 2, 2, 1, 1, 1024, 2, 1, 2, 0};
CheckTracesAndReset(&table, k3, ARRAYSIZE(k3));
// Table w/ 2 x t1, 1 x t2
AddTrace(&table, t1);
AddTrace(&table, t2);
AddTrace(&table, t1);
CHECK_EQ(table.depth_total(), 4);
CHECK_EQ(table.bucket_total(), 2);
static const uintptr_t k4[] = {2, 2048, 2, 1, 2, 1, 512, 2, 2, 1, 0};
CheckTracesAndReset(&table, k4, ARRAYSIZE(k4));
// Table w/ t1, t3
// Same stack as t1, but w/ different size
tcmalloc::StackTrace t3;
t3.size = static_cast<uintptr_t>(2);
@ -89,12 +81,11 @@ int main(int argc, char **argv) {
t3.stack[0] = reinterpret_cast<void*>(1);
t3.stack[1] = reinterpret_cast<void*>(2);
// Table w/ t1, t3
AddTrace(&table, t1);
AddTrace(&table, t3);
CHECK_EQ(table.depth_total(), 2);
CHECK_EQ(table.bucket_total(), 1);
static const uintptr_t k5[] = {2, 1026, 2, 1, 2, 0};
CHECK_EQ(table.depth_total(), 4);
CHECK_EQ(table.bucket_total(), 2);
static const uintptr_t k5[] = {1, 2, 2, 1, 2, 1, 1024, 2, 1, 2, 0};
CheckTracesAndReset(&table, k5, ARRAYSIZE(k5));
puts("PASS");