mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-24 00:02:52 +00:00
x86/swr: add ff_resample_{common, linear}_int16_xop
Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
1a69224f44
commit
c45b7f0d80
@ -176,8 +176,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
||||
.inner_loop:
|
||||
movu m1, [srcq+min_filter_count_x4q*1]
|
||||
%ifidn %1, int16
|
||||
pmaddwd m1, [filterq+min_filter_count_x4q*1]
|
||||
paddd m0, m1
|
||||
PMADCSWD m0, m1, [filterq+min_filter_count_x4q*1], m0, m1
|
||||
%else ; float/double
|
||||
%if cpuflag(fma4) || cpuflag(fma3)
|
||||
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0
|
||||
@ -190,14 +189,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
|
||||
js .inner_loop
|
||||
|
||||
%ifidn %1, int16
|
||||
%if mmsize == 16
|
||||
pshufd m1, m0, q0032
|
||||
paddd m0, m1
|
||||
pshufd m1, m0, q0001
|
||||
%else ; mmsize == 8
|
||||
pshufw m1, m0, q0032
|
||||
%endif
|
||||
paddd m0, m1
|
||||
HADDD m0, m1
|
||||
psrad m0, 15
|
||||
add fracd, dst_incr_modd
|
||||
packssdw m0, m0
|
||||
@ -427,10 +419,15 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
||||
.inner_loop:
|
||||
movu m1, [srcq+min_filter_count_x4q*1]
|
||||
%ifidn %1, int16
|
||||
%if cpuflag(xop)
|
||||
vpmadcswd m2, m1, [filter2q+min_filter_count_x4q*1], m2
|
||||
vpmadcswd m0, m1, [filter1q+min_filter_count_x4q*1], m0
|
||||
%else
|
||||
pmaddwd m3, m1, [filter2q+min_filter_count_x4q*1]
|
||||
pmaddwd m1, [filter1q+min_filter_count_x4q*1]
|
||||
paddd m2, m3
|
||||
paddd m0, m1
|
||||
%endif ; cpuflag
|
||||
%else ; float/double
|
||||
%if cpuflag(fma4) || cpuflag(fma3)
|
||||
fmaddp%4 m2, m1, [filter2q+min_filter_count_x4q*1], m2
|
||||
@ -447,18 +444,21 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
|
||||
|
||||
%ifidn %1, int16
|
||||
%if mmsize == 16
|
||||
%if cpuflag(xop)
|
||||
vphadddq m2, m2
|
||||
vphadddq m0, m0
|
||||
%endif
|
||||
pshufd m3, m2, q0032
|
||||
pshufd m1, m0, q0032
|
||||
paddd m2, m3
|
||||
paddd m0, m1
|
||||
pshufd m3, m2, q0001
|
||||
pshufd m1, m0, q0001
|
||||
%else ; mmsize == 8
|
||||
pshufw m3, m2, q0032
|
||||
pshufw m1, m0, q0032
|
||||
%endif
|
||||
%if notcpuflag(xop)
|
||||
PSHUFLW m3, m2, q0032
|
||||
PSHUFLW m1, m0, q0032
|
||||
paddd m2, m3
|
||||
paddd m0, m1
|
||||
%endif
|
||||
psubd m2, m0
|
||||
; This is probably a really bad idea on atom and other machines with a
|
||||
; long transfer latency between GPRs and XMMs (atom). However, it does
|
||||
@ -591,4 +591,10 @@ RESAMPLE_FNS int16, 2, 1
|
||||
|
||||
INIT_XMM sse2
|
||||
RESAMPLE_FNS int16, 2, 1
|
||||
%if HAVE_XOP_EXTERNAL
|
||||
INIT_XMM xop
|
||||
RESAMPLE_FNS int16, 2, 1
|
||||
%endif
|
||||
|
||||
INIT_XMM sse2
|
||||
RESAMPLE_FNS double, 8, 3, d, pdbl_1
|
||||
|
@ -35,6 +35,7 @@ int ff_resample_linear_##type##_##opt(ResampleContext *c, uint8_t *dst, \
|
||||
|
||||
RESAMPLE_FUNCS(int16, mmxext);
|
||||
RESAMPLE_FUNCS(int16, sse2);
|
||||
RESAMPLE_FUNCS(int16, xop);
|
||||
RESAMPLE_FUNCS(float, sse);
|
||||
RESAMPLE_FUNCS(float, avx);
|
||||
RESAMPLE_FUNCS(float, fma3);
|
||||
@ -73,4 +74,8 @@ void swresample_dsp_x86_init(ResampleContext *c)
|
||||
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_fma4;
|
||||
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_fma4;
|
||||
}
|
||||
if (HAVE_XOP_EXTERNAL && mm_flags & AV_CPU_FLAG_XOP) {
|
||||
c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_xop;
|
||||
c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_xop;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user