mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-27 09:52:17 +00:00
avfilter/x86/af_afir: add FMA3 SIMD
This commit is contained in:
parent
5f810435c2
commit
c5effe7d3d
@ -67,3 +67,30 @@ INIT_XMM sse3
|
||||
FCMUL_ADD
|
||||
INIT_YMM avx
|
||||
FCMUL_ADD
|
||||
|
||||
%if HAVE_FMA3_EXTERNAL
|
||||
INIT_YMM fma3
|
||||
cglobal fcmul_add, 4,4,4, sum, t, c, len
|
||||
shl lend, 3
|
||||
add tq, lenq
|
||||
add cq, lenq
|
||||
add sumq, lenq
|
||||
neg lenq
|
||||
.loop:
|
||||
movaps m0, [tq + lenq]
|
||||
movaps m1, [cq + lenq]
|
||||
vpermilps m3, m0, 177
|
||||
vpermilps m2, m1, 160
|
||||
vpermilps m1, m1, 245
|
||||
mulps m1, m1, m3
|
||||
vfmaddsub132ps m0, m1, m2
|
||||
addps m0, m0, [sumq + lenq]
|
||||
movaps [sumq + lenq], m0
|
||||
add lenq, mmsize
|
||||
jl .loop
|
||||
movss xm0, [tq + lenq]
|
||||
mulss xm0, [cq + lenq]
|
||||
addss xm0, [sumq + lenq]
|
||||
movss [sumq + lenq], xm0
|
||||
RET
|
||||
%endif
|
||||
|
@ -26,6 +26,8 @@ void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
|
||||
ptrdiff_t len);
|
||||
void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
|
||||
ptrdiff_t len);
|
||||
void ff_fcmul_add_fma3(float *sum, const float *t, const float *c,
|
||||
ptrdiff_t len);
|
||||
|
||||
av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
|
||||
{
|
||||
@ -37,4 +39,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||
s->fcmul_add = ff_fcmul_add_avx;
|
||||
}
|
||||
if (EXTERNAL_FMA3_FAST(cpu_flags)) {
|
||||
s->fcmul_add = ff_fcmul_add_fma3;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user