mirror of https://git.ffmpeg.org/ffmpeg.git
vp9lpf/x86: add an SSE2 version of vp9_loop_filter_[vh]_88_16
Similar gains as the ssse3 version once again Additional improvements by Clément Bœsch <u@pkh.me>. Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Anton Khirnov <anton@khirnov.net>
This commit is contained in:
parent
6bea478158
commit
92d47550ea
|
@ -226,6 +226,7 @@ void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri
|
|||
lpf_funcs(16, 16, sse2);
|
||||
lpf_funcs(16, 16, ssse3);
|
||||
lpf_funcs(16, 16, avx);
|
||||
lpf_funcs(88, 16, sse2);
|
||||
lpf_funcs(88, 16, ssse3);
|
||||
lpf_funcs(88, 16, avx);
|
||||
|
||||
|
@ -293,6 +294,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
|
|||
init_fpel(1, 1, 32, avg, sse2);
|
||||
init_fpel(0, 1, 64, avg, sse2);
|
||||
if (ARCH_X86_64) {
|
||||
dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_sse2;
|
||||
dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_sse2;
|
||||
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_sse2;
|
||||
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_sse2;
|
||||
}
|
||||
|
|
|
@ -292,6 +292,17 @@ SECTION .text
|
|||
%define Q7 dst2q + strideq
|
||||
%endmacro
|
||||
|
||||
; ..............AB -> AAAAAAAABBBBBBBB
|
||||
%macro SPLATB_MIX 1-2 [mask_mix]
|
||||
%if cpuflag(ssse3)
|
||||
pshufb %1, %2
|
||||
%else
|
||||
punpcklbw %1, %1
|
||||
punpcklwd %1, %1
|
||||
punpckldq %1, %1
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro LOOPFILTER 2 ; %1=v/h %2=size1
|
||||
lea mstrideq, [strideq]
|
||||
neg mstrideq
|
||||
|
@ -382,11 +393,13 @@ SECTION .text
|
|||
SPLATB_REG m2, I, m0 ; I I I I ...
|
||||
SPLATB_REG m3, E, m0 ; E E E E ...
|
||||
%elif %2 == 88
|
||||
%if cpuflag(ssse3)
|
||||
mova m0, [mask_mix]
|
||||
%endif
|
||||
movd m2, Id
|
||||
movd m3, Ed
|
||||
pshufb m2, m0
|
||||
pshufb m3, m0
|
||||
SPLATB_MIX m2, m0
|
||||
SPLATB_MIX m3, m0
|
||||
%endif
|
||||
mova m0, [pb_80]
|
||||
pxor m2, m0
|
||||
|
@ -446,7 +459,7 @@ SECTION .text
|
|||
SPLATB_REG m7, H, m0 ; H H H H ...
|
||||
%else
|
||||
movd m7, Hd
|
||||
pshufb m7, [mask_mix]
|
||||
SPLATB_MIX m7
|
||||
%endif
|
||||
pxor m7, m8
|
||||
pxor m4, m8
|
||||
|
@ -727,6 +740,7 @@ LPF_16_16_VH sse2
|
|||
LPF_16_16_VH ssse3
|
||||
LPF_16_16_VH avx
|
||||
|
||||
LPF_88_16_VH sse2
|
||||
LPF_88_16_VH ssse3
|
||||
LPF_88_16_VH avx
|
||||
|
||||
|
|
Loading…
Reference in New Issue