x86: h264_intrapred: use newly introduced SPLAT* and PSHUFLW macros

This commit is contained in:
Diego Biurrun 2012-07-04 15:32:16 +02:00
parent 4d4752366f
commit 878e669029

View File

@ -103,15 +103,8 @@ cglobal pred16x16_horizontal, 2,3
%else %else
punpcklbw m0, m0 punpcklbw m0, m0
punpcklbw m1, m1 punpcklbw m1, m1
%if cpuflag(mmx2) SPLATW m0, m0, 3
pshufw m0, m0, 0xff SPLATW m1, m1, 3
pshufw m1, m1, 0xff
%else
punpckhwd m0, m0
punpckhwd m1, m1
punpckhdq m0, m0
punpckhdq m1, m1
%endif
mova [r0+r1*0+8], m0 mova [r0+r1*0+8], m0
mova [r0+r1*1+8], m1 mova [r0+r1*1+8], m1
%endif %endif
@ -162,18 +155,8 @@ cglobal pred16x16_dc, 2,7
shr r2d, 5 shr r2d, 5
%if cpuflag(ssse3) %if cpuflag(ssse3)
pxor m1, m1 pxor m1, m1
movd m0, r2d
pshufb m0, m1
%elif cpuflag(sse2)
movd m0, r2d
punpcklbw m0, m0
pshuflw m0, m0, 0
punpcklqdq m0, m0
%elif cpuflag(mmx2)
movd m0, r2d
punpcklbw m0, m0
pshufw m0, m0, 0
%endif %endif
SPLATB_REG m0, r2d, m1
%if mmsize==8 %if mmsize==8
mov r3d, 8 mov r3d, 8
@ -227,12 +210,7 @@ cglobal pred16x16_tm_vp8, 2,5
movzx r2d, byte [r0+r1-1] movzx r2d, byte [r0+r1-1]
sub r2d, r3d sub r2d, r3d
movd mm4, r2d movd mm4, r2d
%if cpuflag(mmx2) SPLATW mm4, mm4, 0
pshufw mm4, mm4, 0
%else
punpcklwd mm4, mm4
punpckldq mm4, mm4
%endif
movq mm5, mm4 movq mm5, mm4
movq mm6, mm4 movq mm6, mm4
movq mm7, mm4 movq mm7, mm4
@ -332,19 +310,15 @@ cglobal pred16x16_plane_%1, 2,9,7
movhlps m1, m0 movhlps m1, m0
%endif %endif
paddw m0, m1 paddw m0, m1
%if cpuflag(sse2) %if cpuflag(mmx2)
pshuflw m1, m0, 0xE PSHUFLW m1, m0, 0xE
%elif cpuflag(mmx2)
pshufw m1, m0, 0xE
%elif cpuflag(mmx) %elif cpuflag(mmx)
mova m1, m0 mova m1, m0
psrlq m1, 32 psrlq m1, 32
%endif %endif
paddw m0, m1 paddw m0, m1
%if cpuflag(sse2) %if cpuflag(mmx2)
pshuflw m1, m0, 0x1 PSHUFLW m1, m0, 0x1
%elif cpuflag(mmx2)
pshufw m1, m0, 0x1
%elif cpuflag(mmx) %elif cpuflag(mmx)
mova m1, m0 mova m1, m0
psrlq m1, 16 psrlq m1, 16
@ -483,25 +457,9 @@ cglobal pred16x16_plane_%1, 2,9,7
movd m1, r5d movd m1, r5d
movd m3, r3d movd m3, r3d
%if cpuflag(sse2) SPLATW m0, m0, 0 ; H
pshuflw m0, m0, 0x0 SPLATW m1, m1, 0 ; V
pshuflw m1, m1, 0x0 SPLATW m3, m3, 0 ; a
pshuflw m3, m3, 0x0
punpcklqdq m0, m0 ; splat H (words)
punpcklqdq m1, m1 ; splat V (words)
punpcklqdq m3, m3 ; splat a (words)
%elif cpuflag(mmx2)
pshufw m0, m0, 0x0
pshufw m1, m1, 0x0
pshufw m3, m3, 0x0
%elif cpuflag(mmx)
punpcklwd m0, m0
punpcklwd m1, m1
punpcklwd m3, m3
punpckldq m0, m0
punpckldq m1, m1
punpckldq m3, m3
%endif
%ifidn %1, svq3 %ifidn %1, svq3
SWAP 0, 1 SWAP 0, 1
%endif %endif
@ -626,10 +584,8 @@ cglobal pred8x8_plane, 2,9,7
paddw m0, m1 paddw m0, m1
%if notcpuflag(ssse3) %if notcpuflag(ssse3)
%if cpuflag(sse2) ; mmsize == 16 %if cpuflag(mmx2)
pshuflw m1, m0, 0xE PSHUFLW m1, m0, 0xE
%elif cpuflag(mmx2)
pshufw m1, m0, 0xE
%elif cpuflag(mmx) %elif cpuflag(mmx)
mova m1, m0 mova m1, m0
psrlq m1, 32 psrlq m1, 32
@ -637,10 +593,8 @@ cglobal pred8x8_plane, 2,9,7
paddw m0, m1 paddw m0, m1
%endif ; !ssse3 %endif ; !ssse3
%if cpuflag(sse2) %if cpuflag(mmx2)
pshuflw m1, m0, 0x1 PSHUFLW m1, m0, 0x1
%elif cpuflag(mmx2)
pshufw m1, m0, 0x1
%elif cpuflag(mmx) %elif cpuflag(mmx)
mova m1, m0 mova m1, m0
psrlq m1, 16 psrlq m1, 16
@ -711,25 +665,9 @@ cglobal pred8x8_plane, 2,9,7
movd m1, r5d movd m1, r5d
movd m3, r3d movd m3, r3d
%if cpuflag(sse2) SPLATW m0, m0, 0 ; H
pshuflw m0, m0, 0x0 SPLATW m1, m1, 0 ; V
pshuflw m1, m1, 0x0 SPLATW m3, m3, 0 ; a
pshuflw m3, m3, 0x0
punpcklqdq m0, m0 ; splat H (words)
punpcklqdq m1, m1 ; splat V (words)
punpcklqdq m3, m3 ; splat a (words)
%elif cpuflag(mmx2)
pshufw m0, m0, 0x0
pshufw m1, m1, 0x0
pshufw m3, m3, 0x0
%elif cpuflag(mmx)
punpcklwd m0, m0
punpcklwd m1, m1
punpcklwd m3, m3
punpckldq m0, m0
punpckldq m1, m1
punpckldq m3, m3
%endif
%if mmsize == 8 %if mmsize == 8
mova m2, m0 mova m2, m0
%endif %endif
@ -815,24 +753,8 @@ cglobal pred8x8_horizontal, 2,3
mova m2, [pb_3] mova m2, [pb_3]
%endif %endif
.loop: .loop:
movd m0, [r0+r1*0-4] SPLATB_LOAD m0, r0+r1*0-1, m2
movd m1, [r0+r1*1-4] SPLATB_LOAD m1, r0+r1*1-1, m2
%if cpuflag(ssse3)
pshufb m0, m2
pshufb m1, m2
%else
punpcklbw m0, m0
punpcklbw m1, m1
%if cpuflag(mmx2)
pshufw m0, m0, 0xff
pshufw m1, m1, 0xff
%else
punpckhwd m0, m0
punpckhwd m1, m1
punpckhdq m0, m0
punpckhdq m1, m1
%endif
%endif
mova [r0+r1*0], m0 mova [r0+r1*0], m0
mova [r0+r1*1], m1 mova [r0+r1*1], m1
lea r0, [r0+r1*2] lea r0, [r0+r1*2]
@ -1000,15 +922,8 @@ cglobal pred8x8_tm_vp8, 2,6
sub r3d, r4d sub r3d, r4d
movd mm2, r2d movd mm2, r2d
movd mm4, r3d movd mm4, r3d
%if cpuflag(mmx2) SPLATW mm2, mm2, 0
pshufw mm2, mm2, 0 SPLATW mm4, mm4, 0
pshufw mm4, mm4, 0
%else
punpcklwd mm2, mm2
punpcklwd mm4, mm4
punpckldq mm2, mm2
punpckldq mm4, mm4
%endif
movq mm3, mm2 movq mm3, mm2
movq mm5, mm4 movq mm5, mm4
paddw mm2, mm0 paddw mm2, mm0