From 0948ba320496d02ad185487c18b249610de1a184 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Wed, 7 Oct 2015 10:12:26 +0200 Subject: [PATCH] avfilter/x86/vf_blend.asm: add hardmix and phoenix sse2 SIMD Signed-off-by: Paul B Mahol --- libavfilter/x86/vf_blend.asm | 64 +++++++++++++++++++++++++++++++++ libavfilter/x86/vf_blend_init.c | 14 ++++++++ 2 files changed, 78 insertions(+) diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm index 167e72b22d..54b5430a90 100644 --- a/libavfilter/x86/vf_blend.asm +++ b/libavfilter/x86/vf_blend.asm @@ -27,6 +27,8 @@ SECTION_RODATA pw_128: times 8 dw 128 pw_255: times 8 dw 255 +pb_128: times 16 db 128 +pb_255: times 16 db 255 SECTION .text @@ -273,6 +275,37 @@ cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, d jg .nextrow REP_RET +cglobal blend_hardmix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end + add topq, widthq + add bottomq, widthq + add dstq, widthq + sub endq, startq + mova m2, [pb_255] + mova m3, [pb_128] + neg widthq +.nextrow: + mov r10q, widthq + %define x r10q + + .loop: + movu m0, [topq + x] + movu m1, [bottomq + x] + pxor m1, m2 + pxor m0, m3 + pxor m1, m3 + pcmpgtb m1, m0 + pxor m1, m2 + mova [dstq + x], m1 + add r10q, mmsize + jl .loop + + add topq, top_linesizeq + add bottomq, bottom_linesizeq + add dstq, dst_linesizeq + sub endd, 1 + jg .nextrow +REP_RET + cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end add topq, widthq add bottomq, widthq @@ -298,6 +331,37 @@ cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, jg .nextrow REP_RET +cglobal blend_phoenix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end + add topq, widthq + add bottomq, widthq + add dstq, widthq + sub endq, startq + mova m3, [pb_255] + neg widthq +.nextrow: + mov r10q, widthq + %define x r10q + + .loop: + movu m0, [topq + x] + movu m1, [bottomq + x] + mova m2, m0 + pminub m0, m1 + pmaxub m1, m2 + mova m2, m3 + psubusb m2, m1 + paddusb m2, m0 + mova [dstq + x], m2 + add r10q, mmsize + jl .loop + + add topq, top_linesizeq + add bottomq, bottom_linesizeq + add dstq, dst_linesizeq + sub endd, 1 + jg .nextrow +REP_RET + INIT_XMM ssse3 cglobal blend_difference, 9, 10, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end add topq, widthq diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c index 61e90f8d37..454d03030d 100644 --- a/libavfilter/x86/vf_blend_init.c +++ b/libavfilter/x86/vf_blend_init.c @@ -59,6 +59,12 @@ void ff_blend_difference128_sse2(const uint8_t *top, ptrdiff_t top_linesize, ptrdiff_t width, ptrdiff_t start, ptrdiff_t end, struct FilterParams *param, double *values); +void ff_blend_hardmix_sse2(const uint8_t *top, ptrdiff_t top_linesize, + const uint8_t *bottom, ptrdiff_t bottom_linesize, + uint8_t *dst, ptrdiff_t dst_linesize, + ptrdiff_t width, ptrdiff_t start, ptrdiff_t end, + struct FilterParams *param, double *values); + void ff_blend_lighten_sse2(const uint8_t *top, ptrdiff_t top_linesize, const uint8_t *bottom, ptrdiff_t bottom_linesize, uint8_t *dst, ptrdiff_t dst_linesize, @@ -71,6 +77,12 @@ void ff_blend_or_sse2(const uint8_t *top, ptrdiff_t top_linesize, ptrdiff_t width, ptrdiff_t start, ptrdiff_t end, struct FilterParams *param, double *values); +void ff_blend_phoenix_sse2(const uint8_t *top, ptrdiff_t top_linesize, + const uint8_t *bottom, ptrdiff_t bottom_linesize, + uint8_t *dst, ptrdiff_t dst_linesize, + ptrdiff_t width, ptrdiff_t start, ptrdiff_t end, + struct FilterParams *param, double *values); + void ff_blend_subtract_sse2(const uint8_t *top, ptrdiff_t top_linesize, const uint8_t *bottom, ptrdiff_t bottom_linesize, uint8_t *dst, ptrdiff_t dst_linesize, @@ -107,8 +119,10 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit) case BLEND_AVERAGE: param->blend = ff_blend_average_sse2; break; case BLEND_DARKEN: param->blend = ff_blend_darken_sse2; break; case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break; + case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break; case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break; case BLEND_OR: param->blend = ff_blend_or_sse2; break; + case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break; case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break; case BLEND_XOR: param->blend = ff_blend_xor_sse2; break; }