diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm index 40f1c9f053..9dee577e1d 100644 --- a/libavcodec/x86/h264_intrapred_10bit.asm +++ b/libavcodec/x86/h264_intrapred_10bit.asm @@ -171,22 +171,6 @@ PRED4x4_HD ;----------------------------------------------------------------------------- ; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride) ;----------------------------------------------------------------------------- -%macro HADDD 2 ; sum junk -%if mmsize == 16 - movhlps %2, %1 - paddd %1, %2 - pshuflw %2, %1, 0xE - paddd %1, %2 -%else - pshufw %2, %1, 0xE - paddd %1, %2 -%endif -%endmacro - -%macro HADDW 2 - pmaddwd %1, [pw_1] - HADDD %1, %2 -%endmacro INIT_MMX mmxext cglobal pred4x4_dc_10, 3, 3 diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index df58cadf63..67d7905132 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -273,6 +273,39 @@ %endif %endmacro +%macro HADDD 2 ; sum junk +%if sizeof%1 == 32 +%define %2 xmm%2 + vextracti128 %2, %1, 1 +%define %1 xmm%1 + paddd %1, %2 +%endif +%if mmsize >= 16 +%if cpuflag(xop) && sizeof%1 == 16 + vphadddq %1, %1 +%endif + movhlps %2, %1 + paddd %1, %2 +%endif +%if notcpuflag(xop) || sizeof%1 != 16 + PSHUFLW %2, %1, q0032 + paddd %1, %2 +%endif +%undef %1 +%undef %2 +%endmacro + +%macro HADDW 2 ; reg, tmp +%if cpuflag(xop) && sizeof%1 == 16 + vphaddwq %1, %1 + movhlps %2, %1 + paddd %1, %2 +%else + pmaddwd %1, [pw_1] + HADDD %1, %2 +%endif +%endmacro + %macro PALIGNR 4-5 %if cpuflag(ssse3) %if %0==5