central_freelist: change fetch ordering

When we fetch objects from the span for thread cache, we make
reverse-ordered list against original list on the span and suppy this list
to thread cache. This algorithm has trouble with newly created span.
Newly created span has ascending ordered objects list. Since thread cache
will get reverse-ordered list against it, user gets objects as descending order.

Following example shows what occurs in this algorithm.

new span: object list: 1 -> 2 -> 3 -> 4 -> 5 -> ...
fetch N items: N -> N-1 -> N-2 -> ... -> 2 -> 1 -> NULL
thread cache: N -> N-1 -> N-2 -> ... -> 2 -> 1 -> NULL

user's 1st malloc: N
user's 2nd malloc: N-1
...
user's Nth malloc: 1

In general, access memory with ascending order is better than descending
order in terms of the performance. So this patch fix this situation.

I run below program to measure performance effect.

	#define MALLOC_SIZE (512)
	#define CACHE_SIZE (64)
	#define TOUCH_SIZE (512 / CACHE_SIZE)

	array = malloc(sizeof(void *) * count);

	for (i = 0; i < 1; i++) {
		for (j = 0; j < count; j++) {
			x = malloc(MALLOC_SIZE);
			array[j] = x;
		}
	}

	repeat = 10;
	for (i = 0; i < repeat; i++) {
		for (j = 0; j < count; j++) {
			x = array[j];
			for (k = 0; k < TOUCH_SIZE; k++) {
				*(x + (k * CACHE_SIZE)) = '1';
			}
		}
	}

LD_PRELOAD=libtcmalloc_minimal.so perf stat -r 10 ./a.out 1000000

**** Before ****
 Performance counter stats for './a.out 1000000' (10 runs):

       2.715161299 seconds time elapsed                                          ( +-  0.07% )

**** After ****
 Performance counter stats for './a.out 1000000' (10 runs):

       2.259366428 seconds time elapsed                                          ( +-  0.08% )
This commit is contained in:
Joonsoo Kim 2013-10-10 14:26:47 +09:00 committed by Aliaksey Kandratsenka
parent 7315b45c28
commit 7be35fb0d8

View File

@ -294,25 +294,26 @@ int CentralFreeList::FetchFromOneSpans(int N, void **start, void **end) {
ASSERT(span->objects != NULL);
int result = 0;
*end = span->objects;
while (result < N) {
void *t;
void *prev, *curr;
curr = span->objects;
do {
prev = curr;
curr = *(reinterpret_cast<void**>(curr));
} while (++result < N && curr != NULL);
t = span->objects;
span->objects = *(reinterpret_cast<void**>(t));
SLL_Push(start, t);
result++;
if (span->objects == NULL) {
// Move to empty list
tcmalloc::DLL_Remove(span);
tcmalloc::DLL_Prepend(&empty_, span);
Event(span, 'E', 0);
break;
}
if (curr == NULL) {
// Move to empty list
tcmalloc::DLL_Remove(span);
tcmalloc::DLL_Prepend(&empty_, span);
Event(span, 'E', 0);
}
*start = span->objects;
*end = prev;
span->objects = curr;
SLL_SetNext(*end, NULL);
span->refcount += result;
counter_ -= result;
SLL_SetNext(*end, NULL);
return result;
}