New compiler flags to set the size and alignment of tcmalloc pages
Added two new compiler flags, --with-tcmalloc-pagesize and --with-tcmalloc-alignment, in order to set the tcmalloc internal page size and alignment without the need of a compiler directive and to make the choice of the page size independent of the alignment.
This commit is contained in:
parent
1ecc068be9
commit
0d9b42839b
20
INSTALL
20
INSTALL
|
@ -102,19 +102,19 @@ cost of using more space (due to internal fragmentation).
|
|||
Internally, tcmalloc divides its memory into "pages." The default
|
||||
page size is chosen to minimize memory use by reducing fragmentation.
|
||||
The cost is that keeping track of these pages can cost tcmalloc time.
|
||||
We've added a new, experimental flag to tcmalloc that enables a larger
|
||||
page size. In general, this will increase the memory needs of
|
||||
applications using tcmalloc. However, in many cases it will speed up
|
||||
the applications as well, particularly if they allocate and free a lot
|
||||
of memory. We've seen average speedups of 3-5% on Google
|
||||
applications.
|
||||
We've added a new, flag to tcmalloc that enables a larger page size.
|
||||
In general, this will increase the memory needs of applications using
|
||||
tcmalloc. However, in many cases it will speed up the applications
|
||||
as well, particularly if they allocate and free a lot of memory. We've
|
||||
seen average speedups of 3-5% on Google applications.
|
||||
|
||||
This feature is still very experimental; it's not even a configure
|
||||
flag yet. To build libtcmalloc with large pages, run
|
||||
To build libtcmalloc with large pages you need to use the
|
||||
--with-tcmalloc-pagesize=ARG compiler flag, e.g.:
|
||||
|
||||
./configure <normal flags> CXXFLAGS=-DTCMALLOC_LARGE_PAGES
|
||||
./configure <other flags> --with-tcmalloc-pagesize=32
|
||||
|
||||
(or add -DTCMALLOC_LARGE_PAGES to your existing CXXFLAGS argument).
|
||||
The ARG argument can be 8, 32 or 64 which sets the internal page size to
|
||||
8K, 32K and 64K repectively. The default is 8K.
|
||||
|
||||
|
||||
*** SMALL TCMALLOC CACHES: TRADING SPACE FOR TIME
|
||||
|
|
37
configure.ac
37
configure.ac
|
@ -22,7 +22,6 @@ AM_INIT_AUTOMAKE([dist-zip])
|
|||
AC_CONFIG_HEADERS([src/config.h])
|
||||
|
||||
AM_MAINTAINER_MODE()
|
||||
|
||||
# Export the version information (for tc_version and friends)
|
||||
TC_VERSION_MAJOR=`expr "$PACKAGE_VERSION" : '\([[0-9]]*\)'`
|
||||
TC_VERSION_MINOR=`expr "$PACKAGE_VERSION" : '[[0-9]]*\.\([[0-9]]*\)'`
|
||||
|
@ -42,6 +41,8 @@ default_enable_heap_profiler=yes
|
|||
default_enable_heap_checker=yes
|
||||
default_enable_debugalloc=yes
|
||||
default_enable_minimal=no
|
||||
default_tcmalloc_pagesize=8
|
||||
default_tcmalloc_alignment=16
|
||||
need_nanosleep=yes # Used later, to decide if to run ACX_NANOSLEEP
|
||||
case "$host" in
|
||||
*-mingw*) default_enable_minimal=yes; default_enable_debugalloc=no;
|
||||
|
@ -95,6 +96,40 @@ AC_ARG_ENABLE([libunwind],
|
|||
[enable libunwind linking])],
|
||||
[],
|
||||
[enable_libunwind="$default_enable_libunwind"])
|
||||
AC_ARG_WITH([tcmalloc-pagesize],
|
||||
[AS_HELP_STRING([--with-tcmalloc-pagesize],
|
||||
[Set the tcmalloc internal page size to 8K, 32K or 64K])],
|
||||
[],
|
||||
[with_tcmalloc_pagesize=$default_tcmalloc_pagesize])
|
||||
AC_ARG_WITH([tcmalloc-alignment],
|
||||
[AS_HELP_STRING([--with-tcmalloc-alignment],
|
||||
[Set the tcmalloc internal page alignment to 8 or 16 bytes])],
|
||||
[],
|
||||
[with_tcmalloc_alignment=$default_tcmalloc_alignment])
|
||||
|
||||
case "$with_tcmalloc_pagesize" in
|
||||
8)
|
||||
#Default tcmalloc page size.
|
||||
;;
|
||||
32)
|
||||
AC_DEFINE(TCMALLOC_32K_PAGES, 1,
|
||||
[Define 32K of internal pages size for tcmalloc]);;
|
||||
64)
|
||||
AC_DEFINE(TCMALLOC_64K_PAGES, 1,
|
||||
[Define 64K of internal pages size for tcmalloc]);;
|
||||
*)
|
||||
AC_MSG_WARN([${with_tcmalloc_pagesize}K size not supported, using default tcmalloc page size.])
|
||||
esac
|
||||
case "$with_tcmalloc_alignment" in
|
||||
8)
|
||||
AC_DEFINE(TCMALLOC_ALIGN_8BYTES, 1,
|
||||
[Define 8 bytes of internal pages alignment for tcmalloc]);;
|
||||
16)
|
||||
#Default tcmalloc page alignment.
|
||||
;;
|
||||
*)
|
||||
AC_MSG_WARN([${with_tcmalloc_alignment} bytes not supported, using default tcmalloc page alignment.])
|
||||
esac
|
||||
|
||||
# Checks for programs.
|
||||
AC_PROG_CXX
|
||||
|
|
36
src/common.h
36
src/common.h
|
@ -62,6 +62,19 @@ typedef uintptr_t Length;
|
|||
// Configuration
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
#if defined(TCMALLOC_ALIGN_8BYTES)
|
||||
// Unless we force to use 8 bytes alignment we use an alignment of
|
||||
// at least 16 bytes to statisfy requirements for some SSE types.
|
||||
// Keep in mind when using the 16 bytes alignment you can have a space
|
||||
// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
|
||||
static const size_t kMinAlign = 8;
|
||||
// Number of classes created until reach page size 128.
|
||||
static const size_t kBaseClasses = 16;
|
||||
#else
|
||||
static const size_t kMinAlign = 16;
|
||||
static const size_t kBaseClasses = 9;
|
||||
#endif
|
||||
|
||||
// Using large pages speeds up the execution at a cost of larger memory use.
|
||||
// Deallocation may speed up by a factor as the page map gets 8x smaller, so
|
||||
// lookups in the page map result in fewer L2 cache misses, which translates to
|
||||
|
@ -70,28 +83,17 @@ typedef uintptr_t Length;
|
|||
// the thread cache allowance to avoid passing more free ranges to and from
|
||||
// central lists. Also, larger pages are less likely to get freed.
|
||||
// These two factors cause a bounded increase in memory use.
|
||||
|
||||
#if defined(TCMALLOC_LARGE_PAGES)
|
||||
#if defined(TCMALLOC_32K_PAGES)
|
||||
static const size_t kPageShift = 15;
|
||||
static const size_t kNumClasses = 78;
|
||||
static const size_t kMinAlign = 16;
|
||||
#elif defined(TCMALLOC_LARGE_PAGES64K)
|
||||
static const size_t kNumClasses = kBaseClasses + 69;
|
||||
#elif defined(TCMALLOC_64K_PAGES)
|
||||
static const size_t kPageShift = 16;
|
||||
static const size_t kNumClasses = 82;
|
||||
static const size_t kMinAlign = 16;
|
||||
#elif defined(TCMALLOC_ALIGN_8BYTES)
|
||||
static const size_t kPageShift = 13;
|
||||
static const size_t kNumClasses = 95;
|
||||
// Unless we force to use 8 bytes alignment we use an alignment of
|
||||
// at least 16 bytes to statisfy requirements for some SSE types.
|
||||
// Keep in mind when using the 16 bytes alignment you can have a space
|
||||
// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
|
||||
static const size_t kMinAlign = 8;
|
||||
static const size_t kNumClasses = kBaseClasses + 73;
|
||||
#else
|
||||
static const size_t kPageShift = 13;
|
||||
static const size_t kNumClasses = 88;
|
||||
static const size_t kMinAlign = 16;
|
||||
static const size_t kNumClasses = kBaseClasses + 79;
|
||||
#endif
|
||||
|
||||
static const size_t kMaxThreadCacheSize = 4 << 20;
|
||||
|
||||
static const size_t kPageSize = 1 << kPageShift;
|
||||
|
|
Loading…
Reference in New Issue