diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c index 96cb856e94..eb67a1e498 100644 --- a/libavcodec/i386/snowdsp_mmx.c +++ b/libavcodec/i386/snowdsp_mmx.c @@ -333,18 +333,18 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){ "movq 12(%1), %%mm6 \n\t" "paddd (%1), %%mm2 \n\t" "paddd 8(%1), %%mm6 \n\t" - "movq %%mm2, %%mm0 \n\t" - "movq %%mm6, %%mm4 \n\t" - "pslld $2, %%mm2 \n\t" - "pslld $2, %%mm6 \n\t" + "pxor %%mm0, %%mm0 \n\t" //note: the 2 xor could be avoided if we would flip the rounding direction + "pxor %%mm4, %%mm4 \n\t" "psubd %%mm2, %%mm0 \n\t" "psubd %%mm6, %%mm4 \n\t" "psrad $1, %%mm0 \n\t" "psrad $1, %%mm4 \n\t" - "movq (%0), %%mm2 \n\t" - "movq 8(%0), %%mm6 \n\t" "psubd %%mm0, %%mm2 \n\t" "psubd %%mm4, %%mm6 \n\t" + "movq (%0), %%mm0 \n\t" + "movq 8(%0), %%mm4 \n\t" + "paddd %%mm0, %%mm2 \n\t" + "paddd %%mm4, %%mm6 \n\t" "movq %%mm2, (%2) \n\t" "movq %%mm6, 8(%2) \n\t" :: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])