diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm index 06b269828a..a6c65b805d 100644 --- a/libavfilter/x86/vf_interlace.asm +++ b/libavfilter/x86/vf_interlace.asm @@ -39,6 +39,20 @@ SECTION .text pcmpeq%1 m6, m6 + test hq, mmsize + je .loop + + ;process 1 * mmsize + movu m0, [mrefq+hq] + pavg%1 m0, [prefq+hq] + pxor m0, m6 + pxor m2, m6, [srcq+hq] + pavg%1 m0, m2 + pxor m0, m6 + mova [dstq+hq], m0 + add hq, mmsize + jge .end + .loop: movu m0, [mrefq+hq] movu m1, [mrefq+hq+mmsize] @@ -57,7 +71,9 @@ SECTION .text add hq, 2*mmsize jl .loop -REP_RET + +.end: + REP_RET %endmacro %macro LOWPASS_LINE 0 @@ -201,5 +217,10 @@ LOWPASS_LINE INIT_XMM avx LOWPASS_LINE +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +LOWPASS_LINE +%endif + INIT_XMM sse2 LOWPASS_LINE_COMPLEX diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c index b024b61735..0de0fea382 100644 --- a/libavfilter/x86/vf_interlace_init.c +++ b/libavfilter/x86/vf_interlace_init.c @@ -32,6 +32,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, ptrdiff_t mref, ptrdiff_t pref, int clip_max); +void ff_lowpass_line_avx2 (uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, ptrdiff_t mref, @@ -39,6 +42,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, ptrdiff_t mref, ptrdiff_t pref, int clip_max); +void ff_lowpass_line_16_avx2 (uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, ptrdiff_t mref, @@ -62,6 +68,9 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth) if (EXTERNAL_AVX(cpu_flags)) if (s->lowpass == VLPF_LIN) s->lowpass_line = ff_lowpass_line_16_avx; + if (EXTERNAL_AVX2_FAST(cpu_flags)) + if (s->lowpass == VLPF_LIN) + s->lowpass_line = ff_lowpass_line_16_avx2; } else { if (EXTERNAL_SSE2(cpu_flags)) { if (s->lowpass == VLPF_LIN) @@ -72,5 +81,8 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth) if (EXTERNAL_AVX(cpu_flags)) if (s->lowpass == VLPF_LIN) s->lowpass_line = ff_lowpass_line_avx; + if (EXTERNAL_AVX2_FAST(cpu_flags)) + if (s->lowpass == VLPF_LIN) + s->lowpass_line = ff_lowpass_line_avx2; } } diff --git a/libavfilter/x86/vf_tinterlace_init.c b/libavfilter/x86/vf_tinterlace_init.c index 209812964d..2c9b1de581 100644 --- a/libavfilter/x86/vf_tinterlace_init.c +++ b/libavfilter/x86/vf_tinterlace_init.c @@ -33,6 +33,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, ptrdiff_t mref, ptrdiff_t pref, int clip_max); +void ff_lowpass_line_avx2 (uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, ptrdiff_t mref, @@ -40,6 +43,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, ptrdiff_t mref, ptrdiff_t pref, int clip_max); +void ff_lowpass_line_16_avx2 (uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, ptrdiff_t mref, @@ -63,6 +69,11 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s) if (EXTERNAL_AVX(cpu_flags)) if (!(s->flags & TINTERLACE_FLAG_CVLPF)) s->lowpass_line = ff_lowpass_line_16_avx; + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + if (!(s->flags & TINTERLACE_FLAG_CVLPF)) { + s->lowpass_line = ff_lowpass_line_16_avx2; + } + } } else { if (EXTERNAL_SSE2(cpu_flags)) { if (!(s->flags & TINTERLACE_FLAG_CVLPF)) @@ -73,5 +84,10 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s) if (EXTERNAL_AVX(cpu_flags)) if (!(s->flags & TINTERLACE_FLAG_CVLPF)) s->lowpass_line = ff_lowpass_line_avx; + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + if (!(s->flags & TINTERLACE_FLAG_CVLPF)) { + s->lowpass_line = ff_lowpass_line_avx2; + } + } } }