git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@3553 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
michael 2001-12-17 02:26:30 +00:00
parent 64f97cd0a1
commit 1c8fe49230
1 changed files with 65 additions and 0 deletions

View File

@ -87,6 +87,7 @@ static float __attribute__((aligned(16))) sseW5[128];
static float __attribute__((aligned(16))) sseW6[256];
static float __attribute__((aligned(16))) *sseW[7]=
{NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6};
static float __attribute__((aligned(16))) sseWindow[256];
#else
static complex_t buf[128];
#endif
@ -488,15 +489,72 @@ imdct_do_512(sample_t data[],sample_t delay[], sample_t bias)
window_ptr = imdct_window;
/* Window and convert to real valued signal */
#ifdef HAVE_SSE
asm volatile(
"xorl %%edi, %%edi \n\t" // 0
"xorl %%esi, %%esi \n\t" // 0
"movss %3, %%xmm2 \n\t" // bias
"shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
".balign 16 \n\t"
"1: \n\t"
"movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ?
"movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ?
"movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ?
"movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ?
"shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
"mulps sseWindow(%%esi), %%xmm0 \n\t"
"addps (%2, %%esi), %%xmm0 \n\t"
"addps %%xmm2, %%xmm0 \n\t"
"movaps %%xmm0, (%1, %%esi) \n\t"
"addl $16, %%esi \n\t"
"subl $16, %%edi \n\t"
"cmpl $512, %%esi \n\t"
" jb 1b \n\t"
:: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
: "%esi", "%edi"
);
data_ptr+=128;
delay_ptr+=128;
window_ptr+=128;
#else
for(i=0; i< 64; i++) {
*data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
*data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
}
#endif
#ifdef HAVE_SSE
asm volatile(
"movl $1024, %%edi \n\t" // 512
"xorl %%esi, %%esi \n\t" // 0
"movss %3, %%xmm2 \n\t" // bias
"shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
".balign 16 \n\t"
"1: \n\t"
"movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A
"movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C
"movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C
"movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A
"shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
"mulps 512+sseWindow(%%esi), %%xmm0 \n\t"
"addps (%2, %%esi), %%xmm0 \n\t"
"addps %%xmm2, %%xmm0 \n\t"
"movaps %%xmm0, (%1, %%esi) \n\t"
"addl $16, %%esi \n\t"
"subl $16, %%edi \n\t"
"cmpl $512, %%esi \n\t"
" jb 1b \n\t"
:: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
: "%esi", "%edi"
);
data_ptr+=128;
window_ptr+=128;
#else
for(i=0; i< 64; i++) {
*data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
*data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
}
#endif
/* The trailing edge of the window goes into the delay line */
delay_ptr = delay;
@ -710,6 +768,13 @@ void imdct_init (uint32_t mm_accel)
}
}
}
for(i=0; i<128; i++)
{
sseWindow[2*i+0]= -imdct_window[2*i+0];
sseWindow[2*i+1]= imdct_window[2*i+1];
}
#endif
imdct_512 = imdct_do_512;