diff --git a/libavcodec/x86/pngdsp-init.c b/libavcodec/x86/pngdsp-init.c index 9c7d696aa0..136e92eed0 100644 --- a/libavcodec/x86/pngdsp-init.c +++ b/libavcodec/x86/pngdsp-init.c @@ -29,16 +29,22 @@ void ff_add_png_paeth_prediction_ssse3(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); void ff_add_bytes_l2_mmx (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w); +void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1, + uint8_t *src2, int w); void ff_pngdsp_init_x86(PNGDSPContext *dsp) { #if HAVE_YASM int flags = av_get_cpu_flags(); +#if ARCH_X86_32 if (flags & AV_CPU_FLAG_MMX) dsp->add_bytes_l2 = ff_add_bytes_l2_mmx; +#endif if (flags & AV_CPU_FLAG_MMX2) dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; + if (flags & AV_CPU_FLAG_SSE2) + dsp->add_bytes_l2 = ff_add_bytes_l2_sse2; if (flags & AV_CPU_FLAG_SSSE3) dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3; #endif diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm index 92425ad099..bff76e0c0d 100644 --- a/libavcodec/x86/pngdsp.asm +++ b/libavcodec/x86/pngdsp.asm @@ -53,6 +53,21 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i cmp iq, waq jl .loop_v +%if mmsize == 16 + ; vector loop + mov wq, waq + and waq, ~7 + jmp .end_l +.loop_l: + movq mm0, [src1q+iq] + paddb mm0, [src2q+iq] + movq [dstq+iq ], mm0 + add iq, 8 +.end_l: + cmp iq, waq + jl .loop_l +%endif + ; scalar loop for leftover jmp .end_s .loop_s: @@ -66,8 +81,13 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i REP_RET %endmacro +%if ARCH_X86_32 INIT_MMX mmx ADD_BYTES_FN 0 +%endif + +INIT_XMM sse2 +ADD_BYTES_FN 2 %macro ADD_PAETH_PRED_FN 1 cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr