mirror of https://git.ffmpeg.org/ffmpeg.git
flac/x86: add ff_flac_lpc_32_xop()
Tested on an AMD FX 6300 679081 decicycles in ff_flac_lpc_32_xop, 32768 runs 774425 decicycles in ff_flac_lpc_32_sse4, 32768 runs Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
23a8c63452
commit
e87974bc00
|
@ -24,7 +24,8 @@
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
INIT_XMM sse4
|
%macro LPC_32 1
|
||||||
|
INIT_XMM %1
|
||||||
cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
|
cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
|
||||||
sub lend, pred_orderd
|
sub lend, pred_orderd
|
||||||
jle .ret
|
jle .ret
|
||||||
|
@ -43,25 +44,21 @@ ALIGN 16
|
||||||
test jq, jq
|
test jq, jq
|
||||||
jz .end_order
|
jz .end_order
|
||||||
.loop_order:
|
.loop_order:
|
||||||
pmuldq m0, m1
|
pmacsdql m2, m0, m1, m2
|
||||||
paddq m2, m0
|
|
||||||
movd m0, [decodedq+jq*4]
|
movd m0, [decodedq+jq*4]
|
||||||
pmuldq m1, m0
|
pmacsdql m3, m1, m0, m3
|
||||||
paddq m3, m1
|
|
||||||
movd m1, [coeffsq+jq*4]
|
movd m1, [coeffsq+jq*4]
|
||||||
inc jq
|
inc jq
|
||||||
jl .loop_order
|
jl .loop_order
|
||||||
.end_order:
|
.end_order:
|
||||||
pmuldq m0, m1
|
pmacsdql m2, m0, m1, m2
|
||||||
paddq m2, m0
|
|
||||||
psrlq m2, m4
|
psrlq m2, m4
|
||||||
movd m0, [decodedq]
|
movd m0, [decodedq]
|
||||||
paddd m0, m2
|
paddd m0, m2
|
||||||
movd [decodedq], m0
|
movd [decodedq], m0
|
||||||
sub lend, 2
|
sub lend, 2
|
||||||
jl .ret
|
jl .ret
|
||||||
pmuldq m1, m0
|
pmacsdql m3, m1, m0, m3
|
||||||
paddq m3, m1
|
|
||||||
psrlq m3, m4
|
psrlq m3, m4
|
||||||
movd m1, [decodedq+4]
|
movd m1, [decodedq+4]
|
||||||
paddd m1, m3
|
paddd m1, m3
|
||||||
|
@ -69,3 +66,9 @@ ALIGN 16
|
||||||
jg .loop_sample
|
jg .loop_sample
|
||||||
.ret:
|
.ret:
|
||||||
REP_RET
|
REP_RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%if HAVE_XOP_EXTERNAL
|
||||||
|
LPC_32 xop
|
||||||
|
%endif
|
||||||
|
LPC_32 sse4
|
||||||
|
|
|
@ -24,6 +24,8 @@
|
||||||
|
|
||||||
void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
|
void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
|
||||||
int qlevel, int len);
|
int qlevel, int len);
|
||||||
|
void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
|
||||||
|
int qlevel, int len);
|
||||||
|
|
||||||
av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt,
|
av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt,
|
||||||
int bps)
|
int bps)
|
||||||
|
@ -35,5 +37,9 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt,
|
||||||
if (bps > 16 && CONFIG_FLAC_DECODER)
|
if (bps > 16 && CONFIG_FLAC_DECODER)
|
||||||
c->lpc = ff_flac_lpc_32_sse4;
|
c->lpc = ff_flac_lpc_32_sse4;
|
||||||
}
|
}
|
||||||
|
if (EXTERNAL_XOP(cpu_flags)) {
|
||||||
|
if (bps > 16)
|
||||||
|
c->lpc = ff_flac_lpc_32_xop;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue