From e87974bc00e997c5844300687a97a11e0dbf6f12 Mon Sep 17 00:00:00 2001 From: James Almer Date: Sat, 8 Feb 2014 02:54:51 -0300 Subject: [PATCH] flac/x86: add ff_flac_lpc_32_xop() Tested on an AMD FX 6300 679081 decicycles in ff_flac_lpc_32_xop, 32768 runs 774425 decicycles in ff_flac_lpc_32_sse4, 32768 runs Signed-off-by: James Almer Signed-off-by: Michael Niedermayer --- libavcodec/x86/flacdsp.asm | 21 ++++++++++++--------- libavcodec/x86/flacdsp_init.c | 6 ++++++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm index e28f905c6f..1a83cd8f8f 100644 --- a/libavcodec/x86/flacdsp.asm +++ b/libavcodec/x86/flacdsp.asm @@ -24,7 +24,8 @@ SECTION .text -INIT_XMM sse4 +%macro LPC_32 1 +INIT_XMM %1 cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j sub lend, pred_orderd jle .ret @@ -43,25 +44,21 @@ ALIGN 16 test jq, jq jz .end_order .loop_order: - pmuldq m0, m1 - paddq m2, m0 + pmacsdql m2, m0, m1, m2 movd m0, [decodedq+jq*4] - pmuldq m1, m0 - paddq m3, m1 + pmacsdql m3, m1, m0, m3 movd m1, [coeffsq+jq*4] inc jq jl .loop_order .end_order: - pmuldq m0, m1 - paddq m2, m0 + pmacsdql m2, m0, m1, m2 psrlq m2, m4 movd m0, [decodedq] paddd m0, m2 movd [decodedq], m0 sub lend, 2 jl .ret - pmuldq m1, m0 - paddq m3, m1 + pmacsdql m3, m1, m0, m3 psrlq m3, m4 movd m1, [decodedq+4] paddd m1, m3 @@ -69,3 +66,9 @@ ALIGN 16 jg .loop_sample .ret: REP_RET +%endmacro + +%if HAVE_XOP_EXTERNAL +LPC_32 xop +%endif +LPC_32 sse4 diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c index 1a0252226a..151ce3401c 100644 --- a/libavcodec/x86/flacdsp_init.c +++ b/libavcodec/x86/flacdsp_init.c @@ -24,6 +24,8 @@ void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order, int qlevel, int len); +void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order, + int qlevel, int len); av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int bps) @@ -35,5 +37,9 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, if (bps > 16 && CONFIG_FLAC_DECODER) c->lpc = ff_flac_lpc_32_sse4; } + if (EXTERNAL_XOP(cpu_flags)) { + if (bps > 16) + c->lpc = ff_flac_lpc_32_xop; + } #endif }