From 559738eff33f588af595da02be0962fbf4af30ea Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Sun, 31 Oct 2010 13:13:53 +0000 Subject: [PATCH] dsputil_mmx: prefer xmm registers below xmm6 when they are available Originally committed as revision 25606 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/x86/dsputil_mmx.c | 16 ++++++++-------- libavcodec/x86/dsputilenc_mmx.c | 18 +++++++++--------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index f5fe0f16a9..d6af84c48b 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2012,8 +2012,8 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) "1: \n"\ "movaps (%3,%0), %%xmm0 \n"\ stereo("movaps %%xmm0, %%xmm1 \n")\ - "mulps %%xmm6, %%xmm0 \n"\ - stereo("mulps %%xmm7, %%xmm1 \n")\ + "mulps %%xmm4, %%xmm0 \n"\ + stereo("mulps %%xmm5, %%xmm1 \n")\ "lea 1024(%3,%0), %1 \n"\ "mov %5, %2 \n"\ "2: \n"\ @@ -2051,12 +2051,12 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c __asm__ volatile( "1: \n" "sub $8, %0 \n" - "movss (%2,%0), %%xmm6 \n" - "movss 4(%2,%0), %%xmm7 \n" - "shufps $0, %%xmm6, %%xmm6 \n" - "shufps $0, %%xmm7, %%xmm7 \n" - "movaps %%xmm6, (%1,%0,4) \n" - "movaps %%xmm7, 16(%1,%0,4) \n" + "movss (%2,%0), %%xmm4 \n" + "movss 4(%2,%0), %%xmm5 \n" + "shufps $0, %%xmm4, %%xmm4 \n" + "shufps $0, %%xmm5, %%xmm5 \n" + "movaps %%xmm4, (%1,%0,4) \n" + "movaps %%xmm5, 16(%1,%0,4) \n" "jg 1b \n" :"+&r"(j) :"r"(matrix_simd), "r"(matrix) diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index 1ef5a45ffa..f02d1cad75 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -61,16 +61,16 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size) { __asm__ volatile( - "pxor %%xmm7, %%xmm7 \n\t" + "pxor %%xmm4, %%xmm4 \n\t" "movq (%0), %%xmm0 \n\t" "movq (%0, %2), %%xmm1 \n\t" "movq (%0, %2,2), %%xmm2 \n\t" "movq (%0, %3), %%xmm3 \n\t" "lea (%0,%2,4), %0 \n\t" - "punpcklbw %%xmm7, %%xmm0 \n\t" - "punpcklbw %%xmm7, %%xmm1 \n\t" - "punpcklbw %%xmm7, %%xmm2 \n\t" - "punpcklbw %%xmm7, %%xmm3 \n\t" + "punpcklbw %%xmm4, %%xmm0 \n\t" + "punpcklbw %%xmm4, %%xmm1 \n\t" + "punpcklbw %%xmm4, %%xmm2 \n\t" + "punpcklbw %%xmm4, %%xmm3 \n\t" "movdqa %%xmm0, (%1) \n\t" "movdqa %%xmm1, 16(%1) \n\t" "movdqa %%xmm2, 32(%1) \n\t" @@ -79,10 +79,10 @@ static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size "movq (%0, %2), %%xmm1 \n\t" "movq (%0, %2,2), %%xmm2 \n\t" "movq (%0, %3), %%xmm3 \n\t" - "punpcklbw %%xmm7, %%xmm0 \n\t" - "punpcklbw %%xmm7, %%xmm1 \n\t" - "punpcklbw %%xmm7, %%xmm2 \n\t" - "punpcklbw %%xmm7, %%xmm3 \n\t" + "punpcklbw %%xmm4, %%xmm0 \n\t" + "punpcklbw %%xmm4, %%xmm1 \n\t" + "punpcklbw %%xmm4, %%xmm2 \n\t" + "punpcklbw %%xmm4, %%xmm3 \n\t" "movdqa %%xmm0, 64(%1) \n\t" "movdqa %%xmm1, 80(%1) \n\t" "movdqa %%xmm2, 96(%1) \n\t"