mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-25 08:42:39 +00:00
x86/swr: add ff_resample_{common, linear}_int16_xop
Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
1a69224f44
commit
c45b7f0d80
@ -176,8 +176,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
|||||||
.inner_loop:
|
.inner_loop:
|
||||||
movu m1, [srcq+min_filter_count_x4q*1]
|
movu m1, [srcq+min_filter_count_x4q*1]
|
||||||
%ifidn %1, int16
|
%ifidn %1, int16
|
||||||
pmaddwd m1, [filterq+min_filter_count_x4q*1]
|
PMADCSWD m0, m1, [filterq+min_filter_count_x4q*1], m0, m1
|
||||||
paddd m0, m1
|
|
||||||
%else ; float/double
|
%else ; float/double
|
||||||
%if cpuflag(fma4) || cpuflag(fma3)
|
%if cpuflag(fma4) || cpuflag(fma3)
|
||||||
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0
|
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0
|
||||||
@ -190,14 +189,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
|||||||
js .inner_loop
|
js .inner_loop
|
||||||
|
|
||||||
%ifidn %1, int16
|
%ifidn %1, int16
|
||||||
%if mmsize == 16
|
HADDD m0, m1
|
||||||
pshufd m1, m0, q0032
|
|
||||||
paddd m0, m1
|
|
||||||
pshufd m1, m0, q0001
|
|
||||||
%else ; mmsize == 8
|
|
||||||
pshufw m1, m0, q0032
|
|
||||||
%endif
|
|
||||||
paddd m0, m1
|
|
||||||
psrad m0, 15
|
psrad m0, 15
|
||||||
add fracd, dst_incr_modd
|
add fracd, dst_incr_modd
|
||||||
packssdw m0, m0
|
packssdw m0, m0
|
||||||
@ -427,10 +419,15 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
|||||||
.inner_loop:
|
.inner_loop:
|
||||||
movu m1, [srcq+min_filter_count_x4q*1]
|
movu m1, [srcq+min_filter_count_x4q*1]
|
||||||
%ifidn %1, int16
|
%ifidn %1, int16
|
||||||
|
%if cpuflag(xop)
|
||||||
|
vpmadcswd m2, m1, [filter2q+min_filter_count_x4q*1], m2
|
||||||
|
vpmadcswd m0, m1, [filter1q+min_filter_count_x4q*1], m0
|
||||||
|
%else
|
||||||
pmaddwd m3, m1, [filter2q+min_filter_count_x4q*1]
|
pmaddwd m3, m1, [filter2q+min_filter_count_x4q*1]
|
||||||
pmaddwd m1, [filter1q+min_filter_count_x4q*1]
|
pmaddwd m1, [filter1q+min_filter_count_x4q*1]
|
||||||
paddd m2, m3
|
paddd m2, m3
|
||||||
paddd m0, m1
|
paddd m0, m1
|
||||||
|
%endif ; cpuflag
|
||||||
%else ; float/double
|
%else ; float/double
|
||||||
%if cpuflag(fma4) || cpuflag(fma3)
|
%if cpuflag(fma4) || cpuflag(fma3)
|
||||||
fmaddp%4 m2, m1, [filter2q+min_filter_count_x4q*1], m2
|
fmaddp%4 m2, m1, [filter2q+min_filter_count_x4q*1], m2
|
||||||
@ -447,18 +444,21 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
|||||||
|
|
||||||
%ifidn %1, int16
|
%ifidn %1, int16
|
||||||
%if mmsize == 16
|
%if mmsize == 16
|
||||||
|
%if cpuflag(xop)
|
||||||
|
vphadddq m2, m2
|
||||||
|
vphadddq m0, m0
|
||||||
|
%endif
|
||||||
pshufd m3, m2, q0032
|
pshufd m3, m2, q0032
|
||||||
pshufd m1, m0, q0032
|
pshufd m1, m0, q0032
|
||||||
paddd m2, m3
|
paddd m2, m3
|
||||||
paddd m0, m1
|
paddd m0, m1
|
||||||
pshufd m3, m2, q0001
|
|
||||||
pshufd m1, m0, q0001
|
|
||||||
%else ; mmsize == 8
|
|
||||||
pshufw m3, m2, q0032
|
|
||||||
pshufw m1, m0, q0032
|
|
||||||
%endif
|
%endif
|
||||||
|
%if notcpuflag(xop)
|
||||||
|
PSHUFLW m3, m2, q0032
|
||||||
|
PSHUFLW m1, m0, q0032
|
||||||
paddd m2, m3
|
paddd m2, m3
|
||||||
paddd m0, m1
|
paddd m0, m1
|
||||||
|
%endif
|
||||||
psubd m2, m0
|
psubd m2, m0
|
||||||
; This is probably a really bad idea on atom and other machines with a
|
; This is probably a really bad idea on atom and other machines with a
|
||||||
; long transfer latency between GPRs and XMMs (atom). However, it does
|
; long transfer latency between GPRs and XMMs (atom). However, it does
|
||||||
@ -591,4 +591,10 @@ RESAMPLE_FNS int16, 2, 1
|
|||||||
|
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
RESAMPLE_FNS int16, 2, 1
|
RESAMPLE_FNS int16, 2, 1
|
||||||
|
%if HAVE_XOP_EXTERNAL
|
||||||
|
INIT_XMM xop
|
||||||
|
RESAMPLE_FNS int16, 2, 1
|
||||||
|
%endif
|
||||||
|
|
||||||
|
INIT_XMM sse2
|
||||||
RESAMPLE_FNS double, 8, 3, d, pdbl_1
|
RESAMPLE_FNS double, 8, 3, d, pdbl_1
|
||||||
|
@ -35,6 +35,7 @@ int ff_resample_linear_##type##_##opt(ResampleContext *c, uint8_t *dst, \
|
|||||||
|
|
||||||
RESAMPLE_FUNCS(int16, mmxext);
|
RESAMPLE_FUNCS(int16, mmxext);
|
||||||
RESAMPLE_FUNCS(int16, sse2);
|
RESAMPLE_FUNCS(int16, sse2);
|
||||||
|
RESAMPLE_FUNCS(int16, xop);
|
||||||
RESAMPLE_FUNCS(float, sse);
|
RESAMPLE_FUNCS(float, sse);
|
||||||
RESAMPLE_FUNCS(float, avx);
|
RESAMPLE_FUNCS(float, avx);
|
||||||
RESAMPLE_FUNCS(float, fma3);
|
RESAMPLE_FUNCS(float, fma3);
|
||||||
@ -73,4 +74,8 @@ void swresample_dsp_x86_init(ResampleContext *c)
|
|||||||
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_fma4;
|
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_fma4;
|
||||||
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_fma4;
|
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_fma4;
|
||||||
}
|
}
|
||||||
|
if (HAVE_XOP_EXTERNAL && mm_flags & AV_CPU_FLAG_XOP) {
|
||||||
|
c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_xop;
|
||||||
|
c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_xop;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user