x86/aacpsdsp: precompute constant factors

Inspired by the optimization done to the C version by Rémi Denis-Courmont.

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer 2022-09-20 15:09:40 -03:00
parent 08edacc248
commit 2bcf86d53d

View File

@ -400,29 +400,32 @@ HYBRID_SYNTHESIS_DEINT
; const float (*filter)[8][2],
; ptrdiff_t stride, int n);
;*******************************************************************
%macro PS_HYBRID_ANALYSIS_LOOP 3
movu %1, [inq+mmsize*%3]
movu m1, [inq+mmsize*(5-%3)+8]
%if cpuflag(sse3)
pshufd %2, %1, q2301
pshufd m4, m1, q0123
pshufd m1, m1, q1032
pshufd m2, [filterq+nq+mmsize*%3], q2301
addsubps %2, m4
addsubps %1, m1
%else
mova m2, [filterq+nq+mmsize*%3]
mova %2, %1
%macro PS_HYBRID_ANALYSIS_IN 1
movu m0, [inq+mmsize*%1]
movu m1, [inq+mmsize*(5-%1)+8]
mova m3, m0
mova m4, m1
shufps %2, %2, q2301
shufps m3, m3, q2301
shufps m4, m4, q0123
shufps m1, m1, q1032
shufps m2, m2, q2301
%if cpuflag(sse3)
addsubps m3, m4
addsubps m0, m1
%else
xorps m4, m7
xorps m1, m7
subps %2, m4
subps %1, m1
subps m3, m4
subps m0, m1
%endif
mova [rsp+mmsize*%1*2], m3
mova [rsp+mmsize+mmsize*%1*2], m0
%endmacro
%macro PS_HYBRID_ANALYSIS_LOOP 3
mova m2, [filterq+nq+mmsize*%3]
shufps m2, m2, q2301
mova %2, [rsp+mmsize*%3*2]
mova %1, [rsp+mmsize+mmsize*%3*2]
mulps %2, m2
mulps %1, m2
%if %3
@ -432,7 +435,7 @@ HYBRID_SYNTHESIS_DEINT
%endmacro
%macro PS_HYBRID_ANALYSIS 0
cglobal ps_hybrid_analysis, 5, 5, 8, out, in, filter, stride, n
cglobal ps_hybrid_analysis, 5, 5, 8, 24 * 4, out, in, filter, stride, n
%if cpuflag(sse3)
%define MOVH movsd
%else
@ -443,6 +446,9 @@ cglobal ps_hybrid_analysis, 5, 5, 8, out, in, filter, stride, n
add filterq, nq
neg nq
mova m7, [ps_p1m1p1m1]
PS_HYBRID_ANALYSIS_IN 0
PS_HYBRID_ANALYSIS_IN 1
PS_HYBRID_ANALYSIS_IN 2
align 16
.loop: