avfilter/x86/vf_blend.asm: add hardmix and phoenix sse2 SIMD

Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
Paul B Mahol 2015-10-07 10:12:26 +02:00
parent 8a9fa46e87
commit 0948ba3204
2 changed files with 78 additions and 0 deletions

View File

@ -27,6 +27,8 @@ SECTION_RODATA
pw_128: times 8 dw 128 pw_128: times 8 dw 128
pw_255: times 8 dw 255 pw_255: times 8 dw 255
pb_128: times 16 db 128
pb_255: times 16 db 255
SECTION .text SECTION .text
@ -273,6 +275,37 @@ cglobal blend_darken, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, d
jg .nextrow jg .nextrow
REP_RET REP_RET
cglobal blend_hardmix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
add topq, widthq
add bottomq, widthq
add dstq, widthq
sub endq, startq
mova m2, [pb_255]
mova m3, [pb_128]
neg widthq
.nextrow:
mov r10q, widthq
%define x r10q
.loop:
movu m0, [topq + x]
movu m1, [bottomq + x]
pxor m1, m2
pxor m0, m3
pxor m1, m3
pcmpgtb m1, m0
pxor m1, m2
mova [dstq + x], m1
add r10q, mmsize
jl .loop
add topq, top_linesizeq
add bottomq, bottom_linesizeq
add dstq, dst_linesizeq
sub endd, 1
jg .nextrow
REP_RET
cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
add topq, widthq add topq, widthq
add bottomq, widthq add bottomq, widthq
@ -298,6 +331,37 @@ cglobal blend_lighten, 9, 10, 2, 0, top, top_linesize, bottom, bottom_linesize,
jg .nextrow jg .nextrow
REP_RET REP_RET
cglobal blend_phoenix, 9, 10, 4, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
add topq, widthq
add bottomq, widthq
add dstq, widthq
sub endq, startq
mova m3, [pb_255]
neg widthq
.nextrow:
mov r10q, widthq
%define x r10q
.loop:
movu m0, [topq + x]
movu m1, [bottomq + x]
mova m2, m0
pminub m0, m1
pmaxub m1, m2
mova m2, m3
psubusb m2, m1
paddusb m2, m0
mova [dstq + x], m2
add r10q, mmsize
jl .loop
add topq, top_linesizeq
add bottomq, bottom_linesizeq
add dstq, dst_linesizeq
sub endd, 1
jg .nextrow
REP_RET
INIT_XMM ssse3 INIT_XMM ssse3
cglobal blend_difference, 9, 10, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end cglobal blend_difference, 9, 10, 3, 0, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end
add topq, widthq add topq, widthq

View File

@ -59,6 +59,12 @@ void ff_blend_difference128_sse2(const uint8_t *top, ptrdiff_t top_linesize,
ptrdiff_t width, ptrdiff_t start, ptrdiff_t end, ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
struct FilterParams *param, double *values); struct FilterParams *param, double *values);
void ff_blend_hardmix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
const uint8_t *bottom, ptrdiff_t bottom_linesize,
uint8_t *dst, ptrdiff_t dst_linesize,
ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
struct FilterParams *param, double *values);
void ff_blend_lighten_sse2(const uint8_t *top, ptrdiff_t top_linesize, void ff_blend_lighten_sse2(const uint8_t *top, ptrdiff_t top_linesize,
const uint8_t *bottom, ptrdiff_t bottom_linesize, const uint8_t *bottom, ptrdiff_t bottom_linesize,
uint8_t *dst, ptrdiff_t dst_linesize, uint8_t *dst, ptrdiff_t dst_linesize,
@ -71,6 +77,12 @@ void ff_blend_or_sse2(const uint8_t *top, ptrdiff_t top_linesize,
ptrdiff_t width, ptrdiff_t start, ptrdiff_t end, ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
struct FilterParams *param, double *values); struct FilterParams *param, double *values);
void ff_blend_phoenix_sse2(const uint8_t *top, ptrdiff_t top_linesize,
const uint8_t *bottom, ptrdiff_t bottom_linesize,
uint8_t *dst, ptrdiff_t dst_linesize,
ptrdiff_t width, ptrdiff_t start, ptrdiff_t end,
struct FilterParams *param, double *values);
void ff_blend_subtract_sse2(const uint8_t *top, ptrdiff_t top_linesize, void ff_blend_subtract_sse2(const uint8_t *top, ptrdiff_t top_linesize,
const uint8_t *bottom, ptrdiff_t bottom_linesize, const uint8_t *bottom, ptrdiff_t bottom_linesize,
uint8_t *dst, ptrdiff_t dst_linesize, uint8_t *dst, ptrdiff_t dst_linesize,
@ -107,8 +119,10 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
case BLEND_AVERAGE: param->blend = ff_blend_average_sse2; break; case BLEND_AVERAGE: param->blend = ff_blend_average_sse2; break;
case BLEND_DARKEN: param->blend = ff_blend_darken_sse2; break; case BLEND_DARKEN: param->blend = ff_blend_darken_sse2; break;
case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break; case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break;
case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break; case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break;
case BLEND_OR: param->blend = ff_blend_or_sse2; break; case BLEND_OR: param->blend = ff_blend_or_sse2; break;
case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break;
case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break; case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
case BLEND_XOR: param->blend = ff_blend_xor_sse2; break; case BLEND_XOR: param->blend = ff_blend_xor_sse2; break;
} }