From c03ce51dfb4647e362e09aacb9ec238e4691ea18 Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Tue, 7 Feb 2006 07:35:03 +0000 Subject: [PATCH] 11% faster put_h264_qpel16_v_lowpass_mmx2 Originally committed as revision 4950 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/i386/h264dsp_mmx.c | 38 +++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c index 19fb508b93..39daa7f23e 100644 --- a/libavcodec/i386/h264dsp_mmx.c +++ b/libavcodec/i386/h264dsp_mmx.c @@ -591,11 +591,11 @@ static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i );\ }\ \ -static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - int h= 2;\ +static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ + int w= 2;\ src -= 2*srcStride;\ \ - while(h--){\ + while(w--){\ asm volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ @@ -626,8 +626,24 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ : "memory"\ );\ - src += 4-13*srcStride;\ - dst += 4-8*dstStride;\ + if(h==16){\ + asm volatile(\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ + QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + \ + : "+a"(src), "+c"(dst)\ + : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ + }\ + src += 4-(h+5)*srcStride;\ + dst += 4-h*dstStride;\ }\ }\ static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ @@ -731,13 +747,13 @@ static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int dst += 8 - size*dstStride;\ }while(w--);\ }\ +\ +static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ +}\ static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ }\ \ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\