Increase alignment of av_malloc() as needed by AVX ASM.

Signed-off-by: Reinhard Tartler <siretart@tauware.de>
This commit is contained in:
Vitor Sessak 2011-04-23 19:24:31 +02:00 committed by Reinhard Tartler
parent 33cbfa6fa3
commit 13dfce3d44
1 changed files with 7 additions and 9 deletions

View File

@ -69,21 +69,21 @@ void *av_malloc(size_t size)
#endif #endif
/* let's disallow possible ambiguous cases */ /* let's disallow possible ambiguous cases */
if(size > (INT_MAX-16) ) if(size > (INT_MAX-32) )
return NULL; return NULL;
#if CONFIG_MEMALIGN_HACK #if CONFIG_MEMALIGN_HACK
ptr = malloc(size+16); ptr = malloc(size+32);
if(!ptr) if(!ptr)
return ptr; return ptr;
diff= ((-(long)ptr - 1)&15) + 1; diff= ((-(long)ptr - 1)&31) + 1;
ptr = (char*)ptr + diff; ptr = (char*)ptr + diff;
((char*)ptr)[-1]= diff; ((char*)ptr)[-1]= diff;
#elif HAVE_POSIX_MEMALIGN #elif HAVE_POSIX_MEMALIGN
if (posix_memalign(&ptr,16,size)) if (posix_memalign(&ptr,32,size))
ptr = NULL; ptr = NULL;
#elif HAVE_MEMALIGN #elif HAVE_MEMALIGN
ptr = memalign(16,size); ptr = memalign(32,size);
/* Why 64? /* Why 64?
Indeed, we should align it: Indeed, we should align it:
on 4 for 386 on 4 for 386
@ -93,10 +93,8 @@ void *av_malloc(size_t size)
Because L1 and L2 caches are aligned on those values. Because L1 and L2 caches are aligned on those values.
But I don't want to code such logic here! But I don't want to code such logic here!
*/ */
/* Why 16? /* Why 32?
Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs For AVX ASM. SSE / NEON needs only 16.
it will just trigger an exception and the unaligned load will be done in the
exception handler or it will just segfault (SSE2 on P4).
Why not larger? Because I did not see a difference in benchmarks ... Why not larger? Because I did not see a difference in benchmarks ...
*/ */
/* benchmarks with P3 /* benchmarks with P3