x86/af_afir: add ff_fcmul_add_avx()

fcmul_add_c: 1228.8
fcmul_add_sse3: 334.3
fcmul_add_avx: 186.3

Tested on a Core i5 4460 @ 3.2GHz

Reviewed-by: Paul B Mahol <onemda@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer 2019-01-02 21:09:25 -03:00
parent ba89dc27b5
commit 5402c1886b
2 changed files with 12 additions and 1 deletions

View File

@ -27,7 +27,7 @@ SECTION .text
; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
;------------------------------------------------------------------------------
INIT_XMM sse3
%macro FCMUL_ADD 0
cglobal fcmul_add, 4,4,6, sum, t, c, len
shl lend, 3
add tq, lenq
@ -61,3 +61,9 @@ ALIGN 16
addss xm0, [sumq + lenq]
movss [sumq + lenq], xm0
RET
%endmacro
INIT_XMM sse3
FCMUL_ADD
INIT_YMM avx
FCMUL_ADD

View File

@ -24,6 +24,8 @@
void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len);
void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
ptrdiff_t len);
av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
{
@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
if (EXTERNAL_SSE3(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_sse3;
}
if (EXTERNAL_AVX_FAST(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_avx;
}
}