From 76ed71a72bffb45027923e4da5f6fc6a97bfb218 Mon Sep 17 00:00:00 2001 From: James Almer Date: Wed, 16 Apr 2014 20:15:35 -0300 Subject: [PATCH] x86: move horizontal add macros to x86util Also port relevant AVX2/XOP optimizations from x264 with permission to relicense to LGPL from the corresponding authors Signed-off-by: James Almer Reviewed-by: "Ronald S. Bultje" Signed-off-by: Michael Niedermayer --- libavcodec/x86/h264_intrapred_10bit.asm | 16 ------------ libavutil/x86/x86util.asm | 33 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm index 40f1c9f053..9dee577e1d 100644 --- a/libavcodec/x86/h264_intrapred_10bit.asm +++ b/libavcodec/x86/h264_intrapred_10bit.asm @@ -171,22 +171,6 @@ PRED4x4_HD ;----------------------------------------------------------------------------- ; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride) ;----------------------------------------------------------------------------- -%macro HADDD 2 ; sum junk -%if mmsize == 16 - movhlps %2, %1 - paddd %1, %2 - pshuflw %2, %1, 0xE - paddd %1, %2 -%else - pshufw %2, %1, 0xE - paddd %1, %2 -%endif -%endmacro - -%macro HADDW 2 - pmaddwd %1, [pw_1] - HADDD %1, %2 -%endmacro INIT_MMX mmxext cglobal pred4x4_dc_10, 3, 3 diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index df58cadf63..67d7905132 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -273,6 +273,39 @@ %endif %endmacro +%macro HADDD 2 ; sum junk +%if sizeof%1 == 32 +%define %2 xmm%2 + vextracti128 %2, %1, 1 +%define %1 xmm%1 + paddd %1, %2 +%endif +%if mmsize >= 16 +%if cpuflag(xop) && sizeof%1 == 16 + vphadddq %1, %1 +%endif + movhlps %2, %1 + paddd %1, %2 +%endif +%if notcpuflag(xop) || sizeof%1 != 16 + PSHUFLW %2, %1, q0032 + paddd %1, %2 +%endif +%undef %1 +%undef %2 +%endmacro + +%macro HADDW 2 ; reg, tmp +%if cpuflag(xop) && sizeof%1 == 16 + vphaddwq %1, %1 + movhlps %2, %1 + paddd %1, %2 +%else + pmaddwd %1, [pw_1] + HADDD %1, %2 +%endif +%endmacro + %macro PALIGNR 4-5 %if cpuflag(ssse3) %if %0==5