mirror of https://github.com/mpv-player/mpv
libac3 now is full 3dnow! optimized
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@921 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
parent
5e88953e0c
commit
d29c15dbdf
|
@ -32,19 +32,19 @@
|
|||
#ifndef SRFFTP_H__
|
||||
#define SRFFTP_H__
|
||||
|
||||
static complex_t delta16[4] =
|
||||
static complex_t delta16[4] __attribute__((aligned(16))) =
|
||||
{ {1.00000000000000, 0.00000000000000},
|
||||
{0.92387953251129, -0.38268343236509},
|
||||
{0.70710678118655, -0.70710678118655},
|
||||
{0.38268343236509, -0.92387953251129}};
|
||||
|
||||
static complex_t delta16_3[4] =
|
||||
static complex_t delta16_3[4] __attribute__((aligned(16))) =
|
||||
{ {1.00000000000000, 0.00000000000000},
|
||||
{0.38268343236509, -0.92387953251129},
|
||||
{-0.70710678118655, -0.70710678118655},
|
||||
{-0.92387953251129, 0.38268343236509}};
|
||||
|
||||
static complex_t delta32[8] =
|
||||
static complex_t delta32[8] __attribute__((aligned(16))) =
|
||||
{ {1.00000000000000, 0.00000000000000},
|
||||
{0.98078528040323, -0.19509032201613},
|
||||
{0.92387953251129, -0.38268343236509},
|
||||
|
@ -54,7 +54,7 @@ static complex_t delta32[8] =
|
|||
{0.38268343236509, -0.92387953251129},
|
||||
{0.19509032201613, -0.98078528040323}};
|
||||
|
||||
static complex_t delta32_3[8] =
|
||||
static complex_t delta32_3[8] __attribute__((aligned(16))) =
|
||||
{ {1.00000000000000, 0.00000000000000},
|
||||
{0.83146961230255, -0.55557023301960},
|
||||
{0.38268343236509, -0.92387953251129},
|
||||
|
@ -64,7 +64,7 @@ static complex_t delta32_3[8] =
|
|||
{-0.92387953251129, 0.38268343236509},
|
||||
{-0.55557023301960, 0.83146961230255}};
|
||||
|
||||
static complex_t delta64[16] =
|
||||
static complex_t delta64[16] __attribute__((aligned(16))) =
|
||||
{ {1.00000000000000, 0.00000000000000},
|
||||
{0.99518472667220, -0.09801714032956},
|
||||
{0.98078528040323, -0.19509032201613},
|
||||
|
@ -82,7 +82,7 @@ static complex_t delta64[16] =
|
|||
{0.19509032201613, -0.98078528040323},
|
||||
{0.09801714032956, -0.99518472667220}};
|
||||
|
||||
static complex_t delta64_3[16] =
|
||||
static complex_t delta64_3[16] __attribute__((aligned(16))) =
|
||||
{ {1.00000000000000, 0.00000000000000},
|
||||
{0.95694033573221, -0.29028467725446},
|
||||
{0.83146961230255, -0.55557023301960},
|
||||
|
@ -100,7 +100,7 @@ static complex_t delta64_3[16] =
|
|||
{-0.55557023301960, 0.83146961230255},
|
||||
{-0.29028467725446, 0.95694033573221}};
|
||||
|
||||
static complex_t delta128[32] =
|
||||
static complex_t delta128[32] __attribute__((aligned(16))) =
|
||||
{ {1.00000000000000, 0.00000000000000},
|
||||
{0.99879545620517, -0.04906767432742},
|
||||
{0.99518472667220, -0.09801714032956},
|
||||
|
@ -134,7 +134,7 @@ static complex_t delta128[32] =
|
|||
{0.09801714032956, -0.99518472667220},
|
||||
{0.04906767432742, -0.99879545620517}};
|
||||
|
||||
static complex_t delta128_3[32] =
|
||||
static complex_t delta128_3[32] __attribute__((aligned(16))) =
|
||||
{ {1.00000000000000, 0.00000000000000},
|
||||
{0.98917650996478, -0.14673047445536},
|
||||
{0.95694033573221, -0.29028467725446},
|
||||
|
|
|
@ -68,11 +68,18 @@ static complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188,
|
|||
"psrlq $32, "##mm_base"\n\t"\
|
||||
"punpckldq "##mm_hlp","##mm_base"\n\t"
|
||||
#endif
|
||||
#ifdef HAVE_3DNOWEX
|
||||
#define PFNACC_MM(mm_base,mm_hlp) "pfnacc "##mm_base","##mm_base"\n\t"
|
||||
#else
|
||||
#define PFNACC_MM(mm_base,mm_hlp)\
|
||||
"movq "##mm_base","##mm_hlp"\n\t"\
|
||||
"psrlq $32,"##mm_hlp"\n\t"\
|
||||
"punpckldq "##mm_hlp","##mm_hlp"\n\t"\
|
||||
"pfsub "##mm_hlp","##mm_base"\n\t"
|
||||
#endif
|
||||
|
||||
#define TRANSZERO_3DNOW(A0,A4,A8,A12) \
|
||||
{ \
|
||||
__asm__ __volatile__("femms":::"memory");\
|
||||
TRANS_FILL_MM6_MM7_3DNOW()\
|
||||
__asm__ __volatile__(\
|
||||
"movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\
|
||||
"movq %5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \
|
||||
|
@ -98,13 +105,10 @@ static complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188,
|
|||
:"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\
|
||||
:"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
|
||||
:"memory");\
|
||||
__asm__ __volatile__("femms":::"memory");\
|
||||
}
|
||||
|
||||
#define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
|
||||
{\
|
||||
__asm__ __volatile__("femms":::"memory");\
|
||||
TRANS_FILL_MM6_MM7_3DNOW()\
|
||||
__asm__ __volatile__(\
|
||||
"movq %4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
|
||||
"movq %%mm0, %%mm1\n\t"\
|
||||
|
@ -142,7 +146,56 @@ static complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188,
|
|||
:"=m"(A2), "=m"(A10), "=m"(A6), "=m"(A14)\
|
||||
:"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
|
||||
:"memory");\
|
||||
__asm__ __volatile__("femms":::"memory");\
|
||||
}
|
||||
|
||||
#define TRANS_3DNOW(A1,A5,A9,A13,WT,WB,D,D3)\
|
||||
{ \
|
||||
__asm__ __volatile__(\
|
||||
"movq %1, %%mm4\n\t"\
|
||||
"movq %%mm4, %%mm5\n\t"\
|
||||
"punpckldq %%mm4, %%mm4\n\t"/*mm4 = D.re | D.re */\
|
||||
"punpckhdq %%mm5, %%mm5\n\t"/*mm5 = D.im | D.im */\
|
||||
"movq %0, %%mm0\n\t"\
|
||||
"pfmul %%mm0, %%mm4\n\t"/* mm4 =u.re | u.im */\
|
||||
"pfmul %%mm0, %%mm5\n\t"/* mm5 = a.re | a.im */\
|
||||
PSWAP_MM("%%mm5","%%mm3")\
|
||||
"pfmul %%mm7, %%mm5\n\t"\
|
||||
"pfadd %%mm5, %%mm4\n\t"/* mm4 = u*/\
|
||||
"movq %3, %%mm1\n\t"\
|
||||
"movq %2, %%mm0\n\t"\
|
||||
PSWAP_MM("%%mm1","%%mm3")\
|
||||
"movq %%mm0, %%mm2\n\t"\
|
||||
"pfmul %%mm1, %%mm0\n\t"/* mm0 = a*/\
|
||||
"pfmul %3, %%mm2\n\t"/* mm2 = v*/\
|
||||
PFNACC_MM("%%mm2","%%mm3")\
|
||||
"pfacc %%mm0, %%mm0\n\t"\
|
||||
"punpckldq %%mm0,%%mm2\n\t"/*mm2 = v.re | a.re*/\
|
||||
"movq %%mm2, %%mm3\n\t"\
|
||||
"pfmul %%mm7, %%mm3\n\t"\
|
||||
"movq %%mm4, %%mm5\n\t"\
|
||||
"pfmul %%mm6, %%mm5\n\t"\
|
||||
"pfadd %%mm3, %%mm5\n\t"\
|
||||
PSWAP_MM("%%mm5","%%mm3")/* mm5 = v*/\
|
||||
"pfadd %%mm2, %%mm4\n\t"\
|
||||
:\
|
||||
:"m"(WT), "m"(D), "m"(WB), "m"(D3)\
|
||||
:"memory");\
|
||||
__asm__ __volatile__(\
|
||||
"movq %4, %%mm0\n\t"/* a1 = A1*/\
|
||||
"movq %%mm0, %%mm1\n\t"\
|
||||
"pfadd %%mm4, %%mm0\n\t"/*A1 = a1 + u*/\
|
||||
"pfsub %%mm4, %%mm1\n\t"/*A9 = a1 - u*/\
|
||||
"movq %%mm0, %0\n\t"\
|
||||
"movq %%mm1, %1\n\t"\
|
||||
"movq %5, %%mm2\n\t"/* a1 = A5*/\
|
||||
"movq %%mm2, %%mm3\n\t"\
|
||||
"pfsub %%mm5, %%mm2\n\t"/*A5 = a1 - v*/\
|
||||
"pfadd %%mm5, %%mm3\n\t"/*A9 = a1 + v*/\
|
||||
"movq %%mm2, %2\n\t"\
|
||||
"movq %%mm3, %3"\
|
||||
:"=m"(A1), "=m"(A9), "=m"(A5), "=m"(A13)\
|
||||
:"0"(A1), "2"(A5), "m"(u), "m"(v)\
|
||||
:"memory");\
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue