From 02f428051a41fd18f6218adc510ef42153a0ccff Mon Sep 17 00:00:00 2001 From: James Almer Date: Wed, 23 Dec 2015 23:54:33 -0300 Subject: [PATCH] x86/vf_blend: make all functions work on x86_32 Reviewed-by: Paul B Mahol Signed-off-by: James Almer --- libavfilter/x86/vf_blend.asm | 103 ++++++++++++++++---------------- libavfilter/x86/vf_blend_init.c | 4 +- 2 files changed, 52 insertions(+), 55 deletions(-) diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm index d079b79b12..5f0271ae07 100644 --- a/libavfilter/x86/vf_blend.asm +++ b/libavfilter/x86/vf_blend.asm @@ -22,7 +22,6 @@ %include "libavutil/x86/x86util.asm" -%if ARCH_X86_64 SECTION_RODATA pw_128: times 8 dw 128 @@ -34,11 +33,19 @@ pb_255: times 16 db 255 SECTION .text %macro BLEND_INIT 2 -cglobal blend_%1, 9, 11, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end +%if ARCH_X86_64 +cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x + mov widthd, dword widthm +%else +cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x +%define dst_linesizeq r5mp +%define widthq r6mp +%endif + mov endd, dword r8m add topq, widthq add bottomq, widthq add dstq, widthq - sub endq, startq + sub endd, dword r7m ; start neg widthq %endmacro @@ -54,15 +61,14 @@ REP_RET %macro BLEND_SIMPLE 2 BLEND_INIT %1, 2 .nextrow: - mov r10q, widthq - %define x r10q + mov xq, widthq .loop: - movu m0, [topq + x] - movu m1, [bottomq + x] + movu m0, [topq + xq] + movu m1, [bottomq + xq] p%2 m0, m1 - mova [dstq + x], m0 - add r10q, mmsize + mova [dstq + xq], m0 + add xq, mmsize jl .loop BLEND_END %endmacro @@ -80,38 +86,36 @@ BLEND_INIT difference128, 4 pxor m2, m2 mova m3, [pw_128] .nextrow: - mov r10q, widthq - %define x r10q + mov xq, widthq .loop: - movh m0, [topq + x] - movh m1, [bottomq + x] + movh m0, [topq + xq] + movh m1, [bottomq + xq] punpcklbw m0, m2 punpcklbw m1, m2 paddw m0, m3 psubw m0, m1 packuswb m0, m0 - movh [dstq + x], m0 - add r10q, mmsize / 2 + movh [dstq + xq], m0 + add xq, mmsize / 2 jl .loop BLEND_END BLEND_INIT average, 3 pxor m2, m2 .nextrow: - mov r10q, widthq - %define x r10q + mov xq, widthq .loop: - movh m0, [topq + x] - movh m1, [bottomq + x] + movh m0, [topq + xq] + movh m1, [bottomq + xq] punpcklbw m0, m2 punpcklbw m1, m2 paddw m0, m1 psrlw m0, 1 packuswb m0, m0 - movh [dstq + x], m0 - add r10q, mmsize / 2 + movh [dstq + xq], m0 + add xq, mmsize / 2 jl .loop BLEND_END @@ -119,19 +123,18 @@ BLEND_INIT addition128, 4 pxor m2, m2 mova m3, [pw_128] .nextrow: - mov r10q, widthq - %define x r10q + mov xq, widthq .loop: - movh m0, [topq + x] - movh m1, [bottomq + x] + movh m0, [topq + xq] + movh m1, [bottomq + xq] punpcklbw m0, m2 punpcklbw m1, m2 paddw m0, m1 psubw m0, m3 packuswb m0, m0 - movh [dstq + x], m0 - add r10q, mmsize / 2 + movh [dstq + xq], m0 + add xq, mmsize / 2 jl .loop BLEND_END @@ -140,38 +143,36 @@ BLEND_INIT hardmix, 5 mova m3, [pb_128] mova m4, [pb_127] .nextrow: - mov r10q, widthq - %define x r10q + mov xq, widthq .loop: - movu m0, [topq + x] - movu m1, [bottomq + x] + movu m0, [topq + xq] + movu m1, [bottomq + xq] pxor m1, m4 pxor m0, m3 pcmpgtb m1, m0 pxor m1, m2 - mova [dstq + x], m1 - add r10q, mmsize + mova [dstq + xq], m1 + add xq, mmsize jl .loop BLEND_END BLEND_INIT phoenix, 4 mova m3, [pb_255] .nextrow: - mov r10q, widthq - %define x r10q + mov xq, widthq .loop: - movu m0, [topq + x] - movu m1, [bottomq + x] + movu m0, [topq + xq] + movu m1, [bottomq + xq] mova m2, m0 pminub m0, m1 pmaxub m1, m2 mova m2, m3 psubusb m2, m1 paddusb m2, m0 - mova [dstq + x], m2 - add r10q, mmsize + mova [dstq + xq], m2 + add xq, mmsize jl .loop BLEND_END @@ -179,19 +180,18 @@ INIT_XMM ssse3 BLEND_INIT difference, 3 pxor m2, m2 .nextrow: - mov r10q, widthq - %define x r10q + mov xq, widthq .loop: - movh m0, [topq + x] - movh m1, [bottomq + x] + movh m0, [topq + xq] + movh m1, [bottomq + xq] punpcklbw m0, m2 punpcklbw m1, m2 psubw m0, m1 pabsw m0, m0 packuswb m0, m0 - movh [dstq + x], m0 - add r10q, mmsize / 2 + movh [dstq + xq], m0 + add xq, mmsize / 2 jl .loop BLEND_END @@ -199,12 +199,11 @@ BLEND_INIT negation, 5 pxor m2, m2 mova m4, [pw_255] .nextrow: - mov r10q, widthq - %define x r10q + mov xq, widthq .loop: - movh m0, [topq + x] - movh m1, [bottomq + x] + movh m0, [topq + xq] + movh m1, [bottomq + xq] punpcklbw m0, m2 punpcklbw m1, m2 mova m3, m4 @@ -214,9 +213,7 @@ BLEND_INIT negation, 5 mova m0, m4 psubw m0, m3 packuswb m0, m0 - movh [dstq + x], m0 - add r10q, mmsize / 2 + movh [dstq + xq], m0 + add xq, mmsize / 2 jl .loop BLEND_END - -%endif diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c index 82b88487d8..b7d234f070 100644 --- a/libavfilter/x86/vf_blend_init.c +++ b/libavfilter/x86/vf_blend_init.c @@ -49,7 +49,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit) { int cpu_flags = av_get_cpu_flags(); - if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) { + if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) { switch (param->mode) { case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break; case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break; @@ -65,7 +65,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit) case BLEND_XOR: param->blend = ff_blend_xor_sse2; break; } } - if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) { + if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) { switch (param->mode) { case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break; case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break;