cosmetics in imdct_sse

Originally committed as revision 24958 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Loren Merritt 2010-08-28 21:03:13 +00:00
parent 2d0cdf3cc0
commit 19d929f9a3
1 changed files with 20 additions and 25 deletions

View File

@ -532,20 +532,15 @@ INIT_XMM
unpckhps xmm0, xmm2 unpckhps xmm0, xmm2
%endmacro %endmacro
%macro PREROTATEW 3 ;addr1, addr2, xmm
movlps %1, %3
movhps %2, %3
%endmacro
%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
movaps xmm6, [%4+%1*2] movaps xmm6, [%4+%1*2]
movaps %2, [%4+%1*2+0x10] movaps %2, [%4+%1*2+0x10]
movaps %3, xmm6 movaps %3, xmm6
movaps xmm7, %2 movaps xmm7, %2
mulps xmm6, [%5+%1*1] mulps xmm6, [%5+%1]
mulps %2, [%6+%1*1] mulps %2, [%6+%1]
mulps %3, [%6+%1*1] mulps %3, [%6+%1]
mulps xmm7, [%5+%1*1] mulps xmm7, [%5+%1]
subps %2, xmm6 subps %2, xmm6
addps %3, xmm7 addps %3, xmm7
%endmacro %endmacro
@ -576,8 +571,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
%define rrevtab r10 %define rrevtab r10
%define rtcos r11 %define rtcos r11
%define rtsin r12 %define rtsin r12
push r10
push r11
push r12 push r12
push r13 push r13
push r14 push r14
@ -620,21 +613,25 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
PREROTATER r4, r3, r2, rtcos, rtsin PREROTATER r4, r3, r2, rtcos, rtsin
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
movzx r5, word [rrevtab+r4*1-4] movzx r5, word [rrevtab+r4-4]
movzx r6, word [rrevtab+r4*1-2] movzx r6, word [rrevtab+r4-2]
movzx r13, word [rrevtab+r3*1] movzx r13, word [rrevtab+r3]
movzx r14, word [rrevtab+r3*1+2] movzx r14, word [rrevtab+r3+2]
PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0 movlps [r1+r5 *8], xmm0
PREROTATEW [r1+r13*8], [r1+r14*8], xmm1 movhps [r1+r6 *8], xmm0
movlps [r1+r13*8], xmm1
movhps [r1+r14*8], xmm1
add r4, 4 add r4, 4
%else %else
mov r6, [esp] mov r6, [esp]
movzx r5, word [r6+r4*1-4] movzx r5, word [r6+r4-4]
movzx r4, word [r6+r4*1-2] movzx r4, word [r6+r4-2]
PREROTATEW [r1+r5*8], [r1+r4*8], xmm0 movlps [r1+r5*8], xmm0
movzx r5, word [r6+r3*1] movhps [r1+r4*8], xmm0
movzx r4, word [r6+r3*1+2] movzx r5, word [r6+r3]
PREROTATEW [r1+r5*8], [r1+r4*8], xmm1 movzx r4, word [r6+r3+2]
movlps [r1+r5*8], xmm1
movhps [r1+r4*8], xmm1
%endif %endif
sub r3, 4 sub r3, 4
jns .pre jns .pre
@ -663,8 +660,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
pop r14 pop r14
pop r13 pop r13
pop r12 pop r12
pop r11
pop r10
%else %else
add esp, 12 add esp, 12
%endif %endif