New configure flags to set the alignment and page size of tcmalloc

Added two new configure flags, --with-tcmalloc-pagesize and
--with-tcmalloc-alignment, in order to set the tcmalloc internal page
size and tcmalloc allocation alignment without the need of a compiler
directive and to make the choice of the page size independent of the
allocation alignment.
This commit is contained in:
Raphael Moreira Zinsly 2014-12-23 10:29:49 -02:00 committed by Aliaksey Kandratsenka
parent 1035d5c18f
commit 3f55d874be
3 changed files with 65 additions and 28 deletions

20
INSTALL
View File

@ -102,19 +102,19 @@ cost of using more space (due to internal fragmentation).
Internally, tcmalloc divides its memory into "pages." The default
page size is chosen to minimize memory use by reducing fragmentation.
The cost is that keeping track of these pages can cost tcmalloc time.
We've added a new, experimental flag to tcmalloc that enables a larger
page size. In general, this will increase the memory needs of
applications using tcmalloc. However, in many cases it will speed up
the applications as well, particularly if they allocate and free a lot
of memory. We've seen average speedups of 3-5% on Google
applications.
We've added a new flag to tcmalloc that enables a larger page size.
In general, this will increase the memory needs of applications using
tcmalloc. However, in many cases it will speed up the applications
as well, particularly if they allocate and free a lot of memory. We've
seen average speedups of 3-5% on Google applications.
This feature is still very experimental; it's not even a configure
flag yet. To build libtcmalloc with large pages, run
To build libtcmalloc with large pages you need to use the
--with-tcmalloc-pagesize=ARG configure flag, e.g.:
./configure <normal flags> CXXFLAGS=-DTCMALLOC_LARGE_PAGES
./configure <other flags> --with-tcmalloc-pagesize=32
(or add -DTCMALLOC_LARGE_PAGES to your existing CXXFLAGS argument).
The ARG argument can be 8, 32 or 64 which sets the internal page size to
8K, 32K and 64K repectively. The default is 8K.
*** SMALL TCMALLOC CACHES: TRADING SPACE FOR TIME

View File

@ -22,7 +22,6 @@ AM_INIT_AUTOMAKE([dist-zip])
AC_CONFIG_HEADERS([src/config.h])
AM_MAINTAINER_MODE()
# Export the version information (for tc_version and friends)
TC_VERSION_MAJOR=`expr "$PACKAGE_VERSION" : '\([[0-9]]*\)'`
TC_VERSION_MINOR=`expr "$PACKAGE_VERSION" : '[[0-9]]*\.\([[0-9]]*\)'`
@ -42,6 +41,8 @@ default_enable_heap_profiler=yes
default_enable_heap_checker=yes
default_enable_debugalloc=yes
default_enable_minimal=no
default_tcmalloc_pagesize=8
default_tcmalloc_alignment=16
need_nanosleep=yes # Used later, to decide if to run ACX_NANOSLEEP
case "$host" in
*-mingw*) default_enable_minimal=yes; default_enable_debugalloc=no;
@ -95,6 +96,40 @@ AC_ARG_ENABLE([libunwind],
[enable libunwind linking])],
[],
[enable_libunwind="$default_enable_libunwind"])
AC_ARG_WITH([tcmalloc-pagesize],
[AS_HELP_STRING([--with-tcmalloc-pagesize],
[Set the tcmalloc internal page size to 8K, 32K or 64K])],
[],
[with_tcmalloc_pagesize=$default_tcmalloc_pagesize])
AC_ARG_WITH([tcmalloc-alignment],
[AS_HELP_STRING([--with-tcmalloc-alignment],
[Set the tcmalloc allocation alignment to 8 or 16 bytes])],
[],
[with_tcmalloc_alignment=$default_tcmalloc_alignment])
case "$with_tcmalloc_pagesize" in
8)
#Default tcmalloc page size.
;;
32)
AC_DEFINE(TCMALLOC_32K_PAGES, 1,
[Define 32K of internal pages size for tcmalloc]);;
64)
AC_DEFINE(TCMALLOC_64K_PAGES, 1,
[Define 64K of internal pages size for tcmalloc]);;
*)
AC_MSG_WARN([${with_tcmalloc_pagesize}K size not supported, using default tcmalloc page size.])
esac
case "$with_tcmalloc_alignment" in
8)
AC_DEFINE(TCMALLOC_ALIGN_8BYTES, 1,
[Define 8 bytes of allocation alignment for tcmalloc]);;
16)
#Default tcmalloc allocation alignment.
;;
*)
AC_MSG_WARN([${with_tcmalloc_alignment} bytes not supported, using default tcmalloc allocation alignment.])
esac
# Checks for programs.
AC_PROG_CXX

View File

@ -62,6 +62,19 @@ typedef uintptr_t Length;
// Configuration
//-------------------------------------------------------------------
#if defined(TCMALLOC_ALIGN_8BYTES)
// Unless we force to use 8 bytes alignment we use an alignment of
// at least 16 bytes to statisfy requirements for some SSE types.
// Keep in mind when using the 16 bytes alignment you can have a space
// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
static const size_t kMinAlign = 8;
// Number of classes created until reach page size 128.
static const size_t kBaseClasses = 16;
#else
static const size_t kMinAlign = 16;
static const size_t kBaseClasses = 9;
#endif
// Using large pages speeds up the execution at a cost of larger memory use.
// Deallocation may speed up by a factor as the page map gets 8x smaller, so
// lookups in the page map result in fewer L2 cache misses, which translates to
@ -70,28 +83,17 @@ typedef uintptr_t Length;
// the thread cache allowance to avoid passing more free ranges to and from
// central lists. Also, larger pages are less likely to get freed.
// These two factors cause a bounded increase in memory use.
#if defined(TCMALLOC_LARGE_PAGES)
#if defined(TCMALLOC_32K_PAGES)
static const size_t kPageShift = 15;
static const size_t kNumClasses = 78;
static const size_t kMinAlign = 16;
#elif defined(TCMALLOC_LARGE_PAGES64K)
static const size_t kNumClasses = kBaseClasses + 69;
#elif defined(TCMALLOC_64K_PAGES)
static const size_t kPageShift = 16;
static const size_t kNumClasses = 82;
static const size_t kMinAlign = 16;
#elif defined(TCMALLOC_ALIGN_8BYTES)
static const size_t kPageShift = 13;
static const size_t kNumClasses = 95;
// Unless we force to use 8 bytes alignment we use an alignment of
// at least 16 bytes to statisfy requirements for some SSE types.
// Keep in mind when using the 16 bytes alignment you can have a space
// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
static const size_t kMinAlign = 8;
static const size_t kNumClasses = kBaseClasses + 73;
#else
static const size_t kPageShift = 13;
static const size_t kNumClasses = 88;
static const size_t kMinAlign = 16;
static const size_t kNumClasses = kBaseClasses + 79;
#endif
static const size_t kMaxThreadCacheSize = 4 << 20;
static const size_t kPageSize = 1 << kPageShift;