diff --git a/src/common.h b/src/common.h index 14d7c29..a5801b7 100644 --- a/src/common.h +++ b/src/common.h @@ -196,6 +196,21 @@ class SizeMap { return (static_cast(s) + 127 + (120 << 7)) >> 7; } + // If size is no more than kMaxSize, compute index of the + // class_array[] entry for it, putting the class index in output + // parameter idx and returning true. Otherwise return false. + static inline bool ATTRIBUTE_ALWAYS_INLINE ClassIndexMaybe(size_t s, + uint32* idx) { + if (PREDICT_TRUE(s <= kMaxSmallSize)) { + *idx = (static_cast(s) + 7) >> 3; + return true; + } else if (s <= kMaxSize) { + *idx = (static_cast(s) + 127 + (120 << 7)) >> 7; + return true; + } + return false; + } + // Compute index of the class_array[] entry for a given size static inline size_t ClassIndex(size_t s) { // Use unsigned arithmetic to avoid unnecessary sign extensions. @@ -237,31 +252,30 @@ class SizeMap { return class_array_[ClassIndex(size)]; } - inline bool MaybeSizeClass(size_t size, size_t *size_class) { - size_t class_idx; - if (PREDICT_TRUE(size <= kMaxSmallSize)) { - class_idx = SmallSizeClass(size); - } else if (size <= kMaxSize) { - class_idx = LargeSizeClass(size); - } else { + // Check if size is small enough to be representable by a size + // class, and if it is, put matching size class into *cl. Returns + // true iff matching size class was found. + inline bool ATTRIBUTE_ALWAYS_INLINE GetSizeClass(size_t size, uint32* cl) { + uint32 idx; + if (!ClassIndexMaybe(size, &idx)) { return false; } - *size_class = class_array_[class_idx]; + *cl = class_array_[idx]; return true; } // Get the byte-size for a specified class - inline int32 ByteSizeForClass(size_t cl) { + inline int32 ATTRIBUTE_ALWAYS_INLINE ByteSizeForClass(uint32 cl) { return class_to_size_[cl]; } // Mapping from size class to max size storable in that class - inline int32 class_to_size(size_t cl) { + inline int32 class_to_size(uint32 cl) { return class_to_size_[cl]; } // Mapping from size class to number of pages to allocate at a time - inline size_t class_to_pages(size_t cl) { + inline size_t class_to_pages(uint32 cl) { return class_to_pages_[cl]; } @@ -270,7 +284,7 @@ class SizeMap { // amortize the lock overhead for accessing the central list. Making // it too big may temporarily cause unnecessary memory wastage in the // per-thread free list until the scavenger cleans up the list. - inline int num_objects_to_move(size_t cl) { + inline int num_objects_to_move(uint32 cl) { return num_objects_to_move_[cl]; } }; diff --git a/src/packed-cache-inl.h b/src/packed-cache-inl.h index 03a871c..7c216e5 100644 --- a/src/packed-cache-inl.h +++ b/src/packed-cache-inl.h @@ -138,7 +138,7 @@ class PackedCache { public: typedef uintptr_t T; typedef uintptr_t K; - typedef size_t V; + typedef uint32 V; #ifdef TCMALLOC_SMALL_BUT_SLOW // Decrease the size map cache if running in the small memory mode. static const int kHashbits = 12; diff --git a/src/page_heap.cc b/src/page_heap.cc index 50b2752..b92d9ed 100644 --- a/src/page_heap.cc +++ b/src/page_heap.cc @@ -513,7 +513,7 @@ bool PageHeap::EnsureLimit(Length n, bool withRelease) return takenPages + n <= limit; } -void PageHeap::RegisterSizeClass(Span* span, size_t sc) { +void PageHeap::RegisterSizeClass(Span* span, uint32 sc) { // Associate span object with all interior pages as well ASSERT(span->location == Span::IN_USE); ASSERT(GetDescriptor(span->start) == span); diff --git a/src/page_heap.h b/src/page_heap.h index 193bc97..eeb7cd6 100644 --- a/src/page_heap.h +++ b/src/page_heap.h @@ -129,7 +129,7 @@ class PERFTOOLS_DLL_DECL PageHeap { // specified size-class. // REQUIRES: span was returned by an earlier call to New() // and has not yet been deleted. - void RegisterSizeClass(Span* span, size_t sc); + void RegisterSizeClass(Span* span, uint32 sc); // Split an allocated span into two spans: one of length "n" pages // followed by another span of length "span->length - n" pages. @@ -194,16 +194,16 @@ class PERFTOOLS_DLL_DECL PageHeap { Length ReleaseAtLeastNPages(Length num_pages); // Reads and writes to pagemap_cache_ do not require locking. - bool TryGetSizeClass(PageID p, size_t* out) const { + bool TryGetSizeClass(PageID p, uint32* out) const { return pagemap_cache_.TryGet(p, out); } - void SetCachedSizeClass(PageID p, size_t cl) { + void SetCachedSizeClass(PageID p, uint32 cl) { ASSERT(cl != 0); pagemap_cache_.Put(p, cl); } void InvalidateCachedSizeClass(PageID p) { pagemap_cache_.Invalidate(p); } - size_t GetSizeClassOrZero(PageID p) const { - size_t cached_value; + uint32 GetSizeClassOrZero(PageID p) const { + uint32 cached_value; if (!TryGetSizeClass(p, &cached_value)) { cached_value = 0; } diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc index 21b1e5d..9d718f1 100644 --- a/src/tcmalloc.cc +++ b/src/tcmalloc.cc @@ -421,7 +421,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) { out->printf("transfer cache, and central cache, by size class\n"); out->printf("------------------------------------------------\n"); uint64_t cumulative = 0; - for (int cl = 0; cl < Static::num_size_classes(); ++cl) { + for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) { if (class_count[cl] > 0) { size_t cl_size = Static::sizemap()->ByteSizeForClass(cl); uint64_t class_bytes = class_count[cl] * cl_size; @@ -810,7 +810,7 @@ class TCMallocImplementation : public MallocExtension { if ((p >> (kAddressBits - kPageShift)) > 0) { return kNotOwned; } - size_t cl; + uint32 cl; if (Static::pageheap()->TryGetSizeClass(p, &cl)) { return kOwned; } @@ -915,8 +915,8 @@ static uint32_t size_class_with_alignment(size_t size, size_t align) { if (align >= kPageSize) { return 0; } - size_t cl; - if (!Static::sizemap()->MaybeSizeClass(size, &cl)) { + uint32 cl; + if (!Static::sizemap()->GetSizeClass(size, &cl)) { return 0; } // Search through acceptable size classes looking for one with @@ -942,7 +942,7 @@ static ATTRIBUTE_NOINLINE size_t nallocx_slow(size_t size, int flags) { if (PREDICT_FALSE(!Static::IsInited())) ThreadCache::InitModule(); size_t align = static_cast(1ull << (flags & 0x3f)); - size_t cl = size_class_with_alignment(size, align); + uint32 cl = size_class_with_alignment(size, align); if (cl) { return Static::sizemap()->ByteSizeForClass(cl); } else { @@ -960,9 +960,9 @@ size_t tc_nallocx(size_t size, int flags) { if (PREDICT_FALSE(flags != 0)) { return nallocx_slow(size, flags); } - size_t cl; + uint32 cl; // size class 0 is only possible if malloc is not yet initialized - if (Static::sizemap()->MaybeSizeClass(size, &cl) && cl != 0) { + if (Static::sizemap()->GetSizeClass(size, &cl) && cl != 0) { return Static::sizemap()->ByteSizeForClass(cl); } else { return nallocx_slow(size, 0); @@ -1043,7 +1043,7 @@ static TCMallocGuard module_enter_exit_hook; static inline bool CheckCachedSizeClass(void *ptr) { PageID p = reinterpret_cast(ptr) >> kPageShift; - size_t cached_value; + uint32 cached_value; if (!Static::pageheap()->TryGetSizeClass(p, &cached_value)) { return true; } @@ -1270,12 +1270,12 @@ ATTRIBUTE_ALWAYS_INLINE inline void* do_malloc(size_t size) { // note: it will force initialization of malloc if necessary ThreadCache* cache = ThreadCache::GetCache(); - size_t cl; + uint32 cl; ASSERT(Static::IsInited()); ASSERT(cache != NULL); - if (PREDICT_FALSE(!Static::sizemap()->MaybeSizeClass(size, &cl))) { + if (PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size, &cl))) { return do_malloc_pages(cache, size); } @@ -1345,7 +1345,7 @@ void do_free_with_callback(void* ptr, ThreadCache* heap = ThreadCache::GetCacheIfPresent(); const PageID p = reinterpret_cast(ptr) >> kPageShift; - size_t cl; + uint32 cl; #ifndef NO_TCMALLOC_SAMPLES // we only pass size hint when ptr is not page aligned. Which @@ -1353,7 +1353,7 @@ void do_free_with_callback(void* ptr, ASSERT(!use_hint || size_hint < kPageSize); #endif - if (!use_hint || PREDICT_FALSE(!Static::sizemap()->MaybeSizeClass(size_hint, &cl))) { + if (!use_hint || PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size_hint, &cl))) { // if we're in sized delete, but size is too large, no need to // probe size cache bool cache_hit = !use_hint && Static::pageheap()->TryGetSizeClass(p, &cl); @@ -1407,7 +1407,7 @@ inline size_t GetSizeWithCallback(const void* ptr, if (ptr == NULL) return 0; const PageID p = reinterpret_cast(ptr) >> kPageShift; - size_t cl; + uint32 cl; if (Static::pageheap()->TryGetSizeClass(p, &cl)) { return Static::sizemap()->ByteSizeForClass(cl); } @@ -1726,8 +1726,8 @@ static void * malloc_fast_path(size_t size) { return AllocateFull(size); } - size_t cl; - if (PREDICT_FALSE(!Static::sizemap()->MaybeSizeClass(size, &cl))) { + uint32 cl; + if (PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size, &cl))) { return AllocateFull(size); } diff --git a/src/tests/packed-cache_test.cc b/src/tests/packed-cache_test.cc index 4af5178..3984594 100644 --- a/src/tests/packed-cache_test.cc +++ b/src/tests/packed-cache_test.cc @@ -39,14 +39,14 @@ static const int kHashbits = PackedCache<20>::kHashbits; template static size_t MustGet(const PackedCache& cache, uintptr_t key) { - size_t rv; + uint32 rv; CHECK(cache.TryGet(key, &rv)); return rv; } template static size_t Has(const PackedCache& cache, uintptr_t key) { - size_t dummy; + uint32 dummy; return cache.TryGet(key, &dummy); } diff --git a/src/thread_cache.cc b/src/thread_cache.cc index 80a7776..7208d35 100644 --- a/src/thread_cache.cc +++ b/src/thread_cache.cc @@ -94,7 +94,7 @@ void ThreadCache::Init(pthread_t tid) { prev_ = NULL; tid_ = tid; in_setspecific_ = false; - for (size_t cl = 0; cl < Static::num_size_classes(); ++cl) { + for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) { list_[cl].Init(Static::sizemap()->class_to_size(cl)); } @@ -105,7 +105,7 @@ void ThreadCache::Init(pthread_t tid) { void ThreadCache::Cleanup() { // Put unused memory back into central cache - for (int cl = 0; cl < Static::num_size_classes(); ++cl) { + for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) { if (list_[cl].length() > 0) { ReleaseToCentralCache(&list_[cl], cl, list_[cl].length()); } @@ -114,7 +114,7 @@ void ThreadCache::Cleanup() { // Remove some objects of class "cl" from central cache and add to thread heap. // On success, return the first object for immediate use; otherwise return NULL. -void* ThreadCache::FetchFromCentralCache(size_t cl, int32_t byte_size) { +void* ThreadCache::FetchFromCentralCache(uint32 cl, int32_t byte_size) { FreeList* list = &list_[cl]; ASSERT(list->empty()); const int batch_size = Static::sizemap()->num_objects_to_move(cl); @@ -151,7 +151,7 @@ void* ThreadCache::FetchFromCentralCache(size_t cl, int32_t byte_size) { return start; } -void ThreadCache::ListTooLong(FreeList* list, size_t cl) { +void ThreadCache::ListTooLong(FreeList* list, uint32 cl) { size_left_ -= list->object_size(); const int batch_size = Static::sizemap()->num_objects_to_move(cl); @@ -182,7 +182,7 @@ void ThreadCache::ListTooLong(FreeList* list, size_t cl) { } // Remove some objects of class "cl" from thread heap and add to central cache -void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) { +void ThreadCache::ReleaseToCentralCache(FreeList* src, uint32 cl, int N) { ASSERT(src == &list_[cl]); if (N > src->length()) N = src->length(); size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl); diff --git a/src/thread_cache.h b/src/thread_cache.h index f7e9e17..f2f4ecc 100644 --- a/src/thread_cache.h +++ b/src/thread_cache.h @@ -78,15 +78,15 @@ class ThreadCache { void Cleanup(); // Accessors (mostly just for printing stats) - int freelist_length(size_t cl) const { return list_[cl].length(); } + int freelist_length(uint32 cl) const { return list_[cl].length(); } // Total byte size in cache size_t Size() const { return max_size_ - size_left_; } // Allocate an object of the given size and class. The size given // must be the same as the size of the class in the size map. - void* Allocate(size_t size, size_t cl); - void Deallocate(void* ptr, size_t size_class); + void* Allocate(size_t size, uint32 cl); + void Deallocate(void* ptr, uint32 size_class); void Scavenge(); @@ -244,16 +244,16 @@ class ThreadCache { // Gets and returns an object from the central cache, and, if possible, // also adds some objects of that size class to this thread cache. - void* FetchFromCentralCache(size_t cl, int32_t byte_size); + void* FetchFromCentralCache(uint32 cl, int32_t byte_size); - void ListTooLong(void* ptr, size_t cl); + void ListTooLong(void* ptr, uint32 cl); // Releases some number of items from src. Adjusts the list's max_length // to eventually converge on num_objects_to_move(cl). - void ListTooLong(FreeList* src, size_t cl); + void ListTooLong(FreeList* src, uint32 cl); // Releases N items from this thread cache. - void ReleaseToCentralCache(FreeList* src, size_t cl, int N); + void ReleaseToCentralCache(FreeList* src, uint32 cl, int N); void SetMaxSize(int32 new_max_size); @@ -366,7 +366,7 @@ inline int ThreadCache::HeapsInUse() { return threadcache_allocator.inuse(); } -inline ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate(size_t size, size_t cl) { +inline ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate(size_t size, uint32 cl) { FreeList* list = &list_[cl]; #ifdef NO_TCMALLOC_SAMPLES @@ -385,7 +385,7 @@ inline ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate(size_t size, size_t c return rv; } -inline ATTRIBUTE_ALWAYS_INLINE void ThreadCache::Deallocate(void* ptr, size_t cl) { +inline ATTRIBUTE_ALWAYS_INLINE void ThreadCache::Deallocate(void* ptr, uint32 cl) { ASSERT(list_[cl].max_length() > 0); FreeList* list = &list_[cl]; // This catches back-to-back frees of allocs in the same size