From c90762766a501b39c353f3b0bce9279e447a0d96 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sun, 26 Aug 2007 08:31:55 +0000 Subject: [PATCH] optimize 1st horizontal lifting step Originally committed as revision 10231 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/i386/snowdsp_mmx.c | 56 ++++++++++++++--------------------- 1 file changed, 22 insertions(+), 34 deletions(-) diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c index e23d264c7a..fc26537f9b 100644 --- a/libavcodec/i386/snowdsp_mmx.c +++ b/libavcodec/i386/snowdsp_mmx.c @@ -42,8 +42,10 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ i = 0; asm volatile( "pcmpeqd %%xmm7, %%xmm7 \n\t" - "psllw $15, %%xmm7 \n\t" - "psrlw $14, %%xmm7 \n\t" + "pcmpeqd %%xmm3, %%xmm3 \n\t" + "psllw $1, %%xmm3 \n\t" + "paddw %%xmm7, %%xmm3 \n\t" + "psrlw $13, %%xmm3 \n\t" ::); for(; i>W_DS); asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" - "psllw $15, %%mm7 \n\t" - "psrlw $14, %%mm7 \n\t" + "pcmpeqw %%mm3, %%mm3 \n\t" + "psllw $1, %%mm3 \n\t" + "paddw %%mm7, %%mm3 \n\t" + "psllw $13, %%mm3 \n\t" ::); for(; i> W_BS); asm volatile( - "psllw $14, %%mm7 \n\t" + "psllw $15, %%mm7 \n\t" "pcmpeqw %%mm6, %%mm6 \n\t" "psrlw $13, %%mm6 \n\t" "paddw %%mm7, %%mm6 \n\t"