From a678d667816a86b7e5f8c4b558f8ed333bb98841 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Sun, 14 Feb 2016 02:15:18 +0000 Subject: [PATCH] vf_blend: Use integers for divide mode 2.5x faster for 8-bit mode without autovectorization in GCC, 2x slower with it on x86. However, since the platforms we enable GCC autovectorization on most probably has support for SSE2 optimization (added in the subsequent commit), this commit should in general do good. --- libavfilter/vf_blend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c index 4b4d4350c4..61aa17ebeb 100644 --- a/libavfilter/vf_blend.c +++ b/libavfilter/vf_blend.c @@ -247,7 +247,7 @@ DEFINE_BLEND8(hardlight, (B < 128) ? MULTIPLY(2, B, A) : SCREEN(2, B, A)) DEFINE_BLEND8(hardmix, (A < (255 - B)) ? 0: 255) DEFINE_BLEND8(darken, FFMIN(A, B)) DEFINE_BLEND8(lighten, FFMAX(A, B)) -DEFINE_BLEND8(divide, av_clip_uint8(((float)A / ((float)B) * 255))) +DEFINE_BLEND8(divide, av_clip_uint8(B == 0 ? 255 : 255 * A / B)) DEFINE_BLEND8(dodge, DODGE(A, B)) DEFINE_BLEND8(burn, BURN(A, B)) DEFINE_BLEND8(softlight, (A > 127) ? B + (255 - B) * (A - 127.5) / 127.5 * (0.5 - fabs(B - 127.5) / 255): B - B * ((127.5 - A) / 127.5) * (0.5 - fabs(B - 127.5)/255)) @@ -287,7 +287,7 @@ DEFINE_BLEND16(hardlight, (B < 32768) ? MULTIPLY(2, B, A) : SCREEN(2, B, A)) DEFINE_BLEND16(hardmix, (A < (65535 - B)) ? 0: 65535) DEFINE_BLEND16(darken, FFMIN(A, B)) DEFINE_BLEND16(lighten, FFMAX(A, B)) -DEFINE_BLEND16(divide, av_clip_uint16(((float)A / ((float)B) * 65535))) +DEFINE_BLEND16(divide, av_clip_uint16(B == 0 ? 65535 : 65535 * A / B)) DEFINE_BLEND16(dodge, DODGE(A, B)) DEFINE_BLEND16(burn, BURN(A, B)) DEFINE_BLEND16(softlight, (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - fabs(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - fabs(B - 32767.5)/65535))