diff --git a/libavcodec/i386/idct_mmx_xvid.c b/libavcodec/i386/idct_mmx_xvid.c index c27a3546a0..943c50f92b 100644 --- a/libavcodec/i386/idct_mmx_xvid.c +++ b/libavcodec/i386/idct_mmx_xvid.c @@ -72,28 +72,11 @@ //----------------------------------------------------------------------------- -static const int16_t one_corr[4] attribute_used __attribute__ ((aligned(8))) = { - 1,1,1,1}; -static const int32_t round_inv_row[2] attribute_used __attribute__ ((aligned(8))) = { - RND_INV_ROW, RND_INV_ROW}; -static const int16_t round_inv_col[4] attribute_used __attribute__ ((aligned(8))) = { - RND_INV_COL, RND_INV_COL, RND_INV_COL, RND_INV_COL}; -static const int16_t round_inv_corr[4] attribute_used __attribute__ ((aligned(8))) = { - RND_INV_CORR, RND_INV_CORR, RND_INV_CORR, RND_INV_CORR}; -static const int32_t round_frw_row[2] attribute_used __attribute__ ((aligned(8))) = { - RND_FRW_ROW, RND_FRW_ROW}; -static const int16_t tg_1_16[4] attribute_used __attribute__ ((aligned(8))) = { - 13036,13036,13036,13036}; // tg * (2<<16) + 0.5 -static const int16_t tg_2_16[4] attribute_used __attribute__ ((aligned(8))) = { - 27146,27146,27146,27146}; // tg * (2<<16) + 0.5 -static const int16_t tg_3_16[4] attribute_used __attribute__ ((aligned(8))) = { - -21746,-21746,-21746,-21746}; // tg * (2<<16) + 0.5 -static const int16_t cos_4_16[4] attribute_used __attribute__ ((aligned(8))) = { - -19195,-19195,-19195,-19195}; // cos * (2<<16) + 0.5 -static const int16_t ocos_4_16[4] attribute_used __attribute__ ((aligned(8))) = { +static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = { + 13036,13036,13036,13036, // tg * (2<<16) + 0.5 + 27146,27146,27146,27146, // tg * (2<<16) + 0.5 + -21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5 23170,23170,23170,23170}; // cos * (2<<15) + 0.5 -static const int16_t otg_3_16[4] attribute_used __attribute__ ((aligned(8))) = { - 21895,21895,21895,21895}; // tg * (2<<16) + 0.5 static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = { 65536,65536, @@ -414,12 +397,12 @@ static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8 //----------------------------------------------------------------------------- #define DCT_8_INV_COL(A1,A2)\ - "movq tg_3_16,%%mm0\n\t"\ + "movq 2*8(%3),%%mm0\n\t"\ "movq 16*3+" #A1 ",%%mm3\n\t"\ "movq %%mm0,%%mm1 \n\t"/* tg_3_16*/\ "movq 16*5+" #A1 ",%%mm5\n\t"\ "pmulhw %%mm3,%%mm0 \n\t"/* x3*(tg_3_16-1)*/\ - "movq tg_1_16,%%mm4\n\t"\ + "movq (%3),%%mm4\n\t"\ "pmulhw %%mm5,%%mm1 \n\t"/* x5*(tg_3_16-1)*/\ "movq 16*7+" #A1 ",%%mm7\n\t"\ "movq %%mm4,%%mm2 \n\t"/* tg_1_16*/\ @@ -429,7 +412,7 @@ static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8 "pmulhw %%mm6,%%mm2 \n\t"/* x1*tg_1_16*/\ "paddsw %%mm3,%%mm1 \n\t"/* x3+x5*(tg_3_16-1)*/\ "psubsw %%mm5,%%mm0 \n\t"/* x3*tg_3_16-x5 = tm35*/\ - "movq ocos_4_16,%%mm3\n\t"\ + "movq 3*8(%3),%%mm3\n\t"\ "paddsw %%mm5,%%mm1 \n\t"/* x3+x5*tg_3_16 = tp35*/\ "paddsw %%mm6,%%mm4 \n\t"/* x1+tg_1_16*x7 = tp17*/\ "psubsw %%mm7,%%mm2 \n\t"/* x1*tg_1_16-x7 = tm17*/\ @@ -439,7 +422,7 @@ static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8 "psubsw %%mm0,%%mm6 \n\t"/* tm17-tm35 = b3*/\ "psubsw %%mm1,%%mm4 \n\t"/* tp17-tp35 = t1*/\ "paddsw %%mm0,%%mm2 \n\t"/* tm17+tm35 = t2*/\ - "movq tg_2_16,%%mm7\n\t"\ + "movq 1*8(%3),%%mm7\n\t"\ "movq %%mm4,%%mm1 \n\t"/* t1*/\ "movq %%mm5,3*16 +" #A2 "\n\t"/* save b0*/\ "paddsw %%mm2,%%mm1 \n\t"/* t1+t2*/\ @@ -522,7 +505,7 @@ asm volatile( //# Process the columns (4 at a time) DCT_8_INV_COL(0(%0), 0(%0)) DCT_8_INV_COL(8(%0), 8(%0)) - :: "r"(block), "r"(rounder_0), "r"(tab_i_04_mmx)); + :: "r"(block), "r"(rounder_0), "r"(tab_i_04_mmx), "r"(tg_1_16)); } //----------------------------------------------------------------------------- @@ -545,6 +528,6 @@ asm volatile( //# Process the columns (4 at a time) DCT_8_INV_COL(0(%0), 0(%0)) DCT_8_INV_COL(8(%0), 8(%0)) - :: "r"(block), "r"(rounder_0), "r"(tab_i_04_xmm)); + :: "r"(block), "r"(rounder_0), "r"(tab_i_04_xmm), "r"(tg_1_16)); }