flac/x86: add ff_flac_lpc_32_xop()

Tested on an AMD FX 6300

679081 decicycles in ff_flac_lpc_32_xop, 32768 runs
774425 decicycles in ff_flac_lpc_32_sse4, 32768 runs

Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
James Almer 2014-02-08 02:54:51 -03:00 committed by Michael Niedermayer
parent 23a8c63452
commit e87974bc00
2 changed files with 18 additions and 9 deletions

View File

@ -24,7 +24,8 @@
SECTION .text SECTION .text
INIT_XMM sse4 %macro LPC_32 1
INIT_XMM %1
cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
sub lend, pred_orderd sub lend, pred_orderd
jle .ret jle .ret
@ -43,25 +44,21 @@ ALIGN 16
test jq, jq test jq, jq
jz .end_order jz .end_order
.loop_order: .loop_order:
pmuldq m0, m1 pmacsdql m2, m0, m1, m2
paddq m2, m0
movd m0, [decodedq+jq*4] movd m0, [decodedq+jq*4]
pmuldq m1, m0 pmacsdql m3, m1, m0, m3
paddq m3, m1
movd m1, [coeffsq+jq*4] movd m1, [coeffsq+jq*4]
inc jq inc jq
jl .loop_order jl .loop_order
.end_order: .end_order:
pmuldq m0, m1 pmacsdql m2, m0, m1, m2
paddq m2, m0
psrlq m2, m4 psrlq m2, m4
movd m0, [decodedq] movd m0, [decodedq]
paddd m0, m2 paddd m0, m2
movd [decodedq], m0 movd [decodedq], m0
sub lend, 2 sub lend, 2
jl .ret jl .ret
pmuldq m1, m0 pmacsdql m3, m1, m0, m3
paddq m3, m1
psrlq m3, m4 psrlq m3, m4
movd m1, [decodedq+4] movd m1, [decodedq+4]
paddd m1, m3 paddd m1, m3
@ -69,3 +66,9 @@ ALIGN 16
jg .loop_sample jg .loop_sample
.ret: .ret:
REP_RET REP_RET
%endmacro
%if HAVE_XOP_EXTERNAL
LPC_32 xop
%endif
LPC_32 sse4

View File

@ -24,6 +24,8 @@
void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order, void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len); int qlevel, int len);
void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len);
av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt,
int bps) int bps)
@ -35,5 +37,9 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt,
if (bps > 16 && CONFIG_FLAC_DECODER) if (bps > 16 && CONFIG_FLAC_DECODER)
c->lpc = ff_flac_lpc_32_sse4; c->lpc = ff_flac_lpc_32_sse4;
} }
if (EXTERNAL_XOP(cpu_flags)) {
if (bps > 16)
c->lpc = ff_flac_lpc_32_xop;
}
#endif #endif
} }