x86/vf_blend: make all functions work on x86_32

Reviewed-by: Paul B Mahol <onemda@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer 2015-12-23 23:54:33 -03:00
parent 0988c68cf9
commit 02f428051a
2 changed files with 52 additions and 55 deletions

View File

@ -22,7 +22,6 @@
%include "libavutil/x86/x86util.asm" %include "libavutil/x86/x86util.asm"
%if ARCH_X86_64
SECTION_RODATA SECTION_RODATA
pw_128: times 8 dw 128 pw_128: times 8 dw 128
@ -34,11 +33,19 @@ pb_255: times 16 db 255
SECTION .text SECTION .text
%macro BLEND_INIT 2 %macro BLEND_INIT 2
cglobal blend_%1, 9, 11, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end %if ARCH_X86_64
cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x
mov widthd, dword widthm
%else
cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x
%define dst_linesizeq r5mp
%define widthq r6mp
%endif
mov endd, dword r8m
add topq, widthq add topq, widthq
add bottomq, widthq add bottomq, widthq
add dstq, widthq add dstq, widthq
sub endq, startq sub endd, dword r7m ; start
neg widthq neg widthq
%endmacro %endmacro
@ -54,15 +61,14 @@ REP_RET
%macro BLEND_SIMPLE 2 %macro BLEND_SIMPLE 2
BLEND_INIT %1, 2 BLEND_INIT %1, 2
.nextrow: .nextrow:
mov r10q, widthq mov xq, widthq
%define x r10q
.loop: .loop:
movu m0, [topq + x] movu m0, [topq + xq]
movu m1, [bottomq + x] movu m1, [bottomq + xq]
p%2 m0, m1 p%2 m0, m1
mova [dstq + x], m0 mova [dstq + xq], m0
add r10q, mmsize add xq, mmsize
jl .loop jl .loop
BLEND_END BLEND_END
%endmacro %endmacro
@ -80,38 +86,36 @@ BLEND_INIT difference128, 4
pxor m2, m2 pxor m2, m2
mova m3, [pw_128] mova m3, [pw_128]
.nextrow: .nextrow:
mov r10q, widthq mov xq, widthq
%define x r10q
.loop: .loop:
movh m0, [topq + x] movh m0, [topq + xq]
movh m1, [bottomq + x] movh m1, [bottomq + xq]
punpcklbw m0, m2 punpcklbw m0, m2
punpcklbw m1, m2 punpcklbw m1, m2
paddw m0, m3 paddw m0, m3
psubw m0, m1 psubw m0, m1
packuswb m0, m0 packuswb m0, m0
movh [dstq + x], m0 movh [dstq + xq], m0
add r10q, mmsize / 2 add xq, mmsize / 2
jl .loop jl .loop
BLEND_END BLEND_END
BLEND_INIT average, 3 BLEND_INIT average, 3
pxor m2, m2 pxor m2, m2
.nextrow: .nextrow:
mov r10q, widthq mov xq, widthq
%define x r10q
.loop: .loop:
movh m0, [topq + x] movh m0, [topq + xq]
movh m1, [bottomq + x] movh m1, [bottomq + xq]
punpcklbw m0, m2 punpcklbw m0, m2
punpcklbw m1, m2 punpcklbw m1, m2
paddw m0, m1 paddw m0, m1
psrlw m0, 1 psrlw m0, 1
packuswb m0, m0 packuswb m0, m0
movh [dstq + x], m0 movh [dstq + xq], m0
add r10q, mmsize / 2 add xq, mmsize / 2
jl .loop jl .loop
BLEND_END BLEND_END
@ -119,19 +123,18 @@ BLEND_INIT addition128, 4
pxor m2, m2 pxor m2, m2
mova m3, [pw_128] mova m3, [pw_128]
.nextrow: .nextrow:
mov r10q, widthq mov xq, widthq
%define x r10q
.loop: .loop:
movh m0, [topq + x] movh m0, [topq + xq]
movh m1, [bottomq + x] movh m1, [bottomq + xq]
punpcklbw m0, m2 punpcklbw m0, m2
punpcklbw m1, m2 punpcklbw m1, m2
paddw m0, m1 paddw m0, m1
psubw m0, m3 psubw m0, m3
packuswb m0, m0 packuswb m0, m0
movh [dstq + x], m0 movh [dstq + xq], m0
add r10q, mmsize / 2 add xq, mmsize / 2
jl .loop jl .loop
BLEND_END BLEND_END
@ -140,38 +143,36 @@ BLEND_INIT hardmix, 5
mova m3, [pb_128] mova m3, [pb_128]
mova m4, [pb_127] mova m4, [pb_127]
.nextrow: .nextrow:
mov r10q, widthq mov xq, widthq
%define x r10q
.loop: .loop:
movu m0, [topq + x] movu m0, [topq + xq]
movu m1, [bottomq + x] movu m1, [bottomq + xq]
pxor m1, m4 pxor m1, m4
pxor m0, m3 pxor m0, m3
pcmpgtb m1, m0 pcmpgtb m1, m0
pxor m1, m2 pxor m1, m2
mova [dstq + x], m1 mova [dstq + xq], m1
add r10q, mmsize add xq, mmsize
jl .loop jl .loop
BLEND_END BLEND_END
BLEND_INIT phoenix, 4 BLEND_INIT phoenix, 4
mova m3, [pb_255] mova m3, [pb_255]
.nextrow: .nextrow:
mov r10q, widthq mov xq, widthq
%define x r10q
.loop: .loop:
movu m0, [topq + x] movu m0, [topq + xq]
movu m1, [bottomq + x] movu m1, [bottomq + xq]
mova m2, m0 mova m2, m0
pminub m0, m1 pminub m0, m1
pmaxub m1, m2 pmaxub m1, m2
mova m2, m3 mova m2, m3
psubusb m2, m1 psubusb m2, m1
paddusb m2, m0 paddusb m2, m0
mova [dstq + x], m2 mova [dstq + xq], m2
add r10q, mmsize add xq, mmsize
jl .loop jl .loop
BLEND_END BLEND_END
@ -179,19 +180,18 @@ INIT_XMM ssse3
BLEND_INIT difference, 3 BLEND_INIT difference, 3
pxor m2, m2 pxor m2, m2
.nextrow: .nextrow:
mov r10q, widthq mov xq, widthq
%define x r10q
.loop: .loop:
movh m0, [topq + x] movh m0, [topq + xq]
movh m1, [bottomq + x] movh m1, [bottomq + xq]
punpcklbw m0, m2 punpcklbw m0, m2
punpcklbw m1, m2 punpcklbw m1, m2
psubw m0, m1 psubw m0, m1
pabsw m0, m0 pabsw m0, m0
packuswb m0, m0 packuswb m0, m0
movh [dstq + x], m0 movh [dstq + xq], m0
add r10q, mmsize / 2 add xq, mmsize / 2
jl .loop jl .loop
BLEND_END BLEND_END
@ -199,12 +199,11 @@ BLEND_INIT negation, 5
pxor m2, m2 pxor m2, m2
mova m4, [pw_255] mova m4, [pw_255]
.nextrow: .nextrow:
mov r10q, widthq mov xq, widthq
%define x r10q
.loop: .loop:
movh m0, [topq + x] movh m0, [topq + xq]
movh m1, [bottomq + x] movh m1, [bottomq + xq]
punpcklbw m0, m2 punpcklbw m0, m2
punpcklbw m1, m2 punpcklbw m1, m2
mova m3, m4 mova m3, m4
@ -214,9 +213,7 @@ BLEND_INIT negation, 5
mova m0, m4 mova m0, m4
psubw m0, m3 psubw m0, m3
packuswb m0, m0 packuswb m0, m0
movh [dstq + x], m0 movh [dstq + xq], m0
add r10q, mmsize / 2 add xq, mmsize / 2
jl .loop jl .loop
BLEND_END BLEND_END
%endif

View File

@ -49,7 +49,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
{ {
int cpu_flags = av_get_cpu_flags(); int cpu_flags = av_get_cpu_flags();
if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) { if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) {
switch (param->mode) { switch (param->mode) {
case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break; case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break;
case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break; case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break;
@ -65,7 +65,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
case BLEND_XOR: param->blend = ff_blend_xor_sse2; break; case BLEND_XOR: param->blend = ff_blend_xor_sse2; break;
} }
} }
if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) { if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
switch (param->mode) { switch (param->mode) {
case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break; case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break; case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break;