mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-02-27 17:21:05 +00:00
avfilter/x86/vf_blend : add 16 bit version for BLEND_SIMPLE, phoenix, difference for SSE and AVX2 (x86_64)
This commit is contained in:
parent
6c6c9d14a8
commit
53a03b5c8c
@ -36,10 +36,13 @@ pb_255: times 16 db 255
|
|||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
%macro BLEND_INIT 2
|
%macro BLEND_INIT 2-3
|
||||||
%if ARCH_X86_64
|
%if ARCH_X86_64
|
||||||
cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x
|
cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x
|
||||||
mov widthd, dword widthm
|
mov widthd, dword widthm
|
||||||
|
%if %0 == 3; is 16 bit
|
||||||
|
add widthq, widthq ; doesn't compile on x86_32
|
||||||
|
%endif
|
||||||
%else
|
%else
|
||||||
cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x
|
cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x
|
||||||
%define dst_linesizeq r5mp
|
%define dst_linesizeq r5mp
|
||||||
@ -61,8 +64,8 @@ cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end
|
|||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro BLEND_SIMPLE 2
|
%macro BLEND_SIMPLE 2-3
|
||||||
BLEND_INIT %1, 2
|
BLEND_INIT %1, 2, %3
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov xq, widthq
|
mov xq, widthq
|
||||||
|
|
||||||
@ -270,8 +273,9 @@ BLEND_INIT divide, 4
|
|||||||
BLEND_END
|
BLEND_END
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro PHOENIX 0
|
%macro PHOENIX 2-3
|
||||||
BLEND_INIT phoenix, 4
|
; %1 name, %2 b or w, %3 (opt) 1 if 16 bit
|
||||||
|
BLEND_INIT %1, 4, %3
|
||||||
VBROADCASTI128 m3, [pb_255]
|
VBROADCASTI128 m3, [pb_255]
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov xq, widthq
|
mov xq, widthq
|
||||||
@ -280,19 +284,19 @@ BLEND_INIT phoenix, 4
|
|||||||
movu m0, [topq + xq]
|
movu m0, [topq + xq]
|
||||||
movu m1, [bottomq + xq]
|
movu m1, [bottomq + xq]
|
||||||
mova m2, m0
|
mova m2, m0
|
||||||
pminub m0, m1
|
pminu%2 m0, m1
|
||||||
pmaxub m1, m2
|
pmaxu%2 m1, m2
|
||||||
mova m2, m3
|
mova m2, m3
|
||||||
psubusb m2, m1
|
psubus%2 m2, m1
|
||||||
paddusb m2, m0
|
paddus%2 m2, m0
|
||||||
mova [dstq + xq], m2
|
mova [dstq + xq], m2
|
||||||
add xq, mmsize
|
add xq, mmsize
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro BLEND_ABS 0
|
%macro DIFFERENCE 1-2
|
||||||
BLEND_INIT difference, 5
|
BLEND_INIT %1, 5, %2
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov xq, widthq
|
mov xq, widthq
|
||||||
@ -300,6 +304,17 @@ BLEND_INIT difference, 5
|
|||||||
.loop:
|
.loop:
|
||||||
movu m0, [topq + xq]
|
movu m0, [topq + xq]
|
||||||
movu m1, [bottomq + xq]
|
movu m1, [bottomq + xq]
|
||||||
|
%if %0 == 2 ; 16 bit
|
||||||
|
punpckhwd m3, m0, m2
|
||||||
|
punpcklwd m0, m2
|
||||||
|
punpckhwd m4, m1, m2
|
||||||
|
punpcklwd m1, m2
|
||||||
|
psubd m0, m1
|
||||||
|
psubd m3, m4
|
||||||
|
pabsd m0, m0
|
||||||
|
pabsd m3, m3
|
||||||
|
packusdw m0, m3
|
||||||
|
%else
|
||||||
punpckhbw m3, m0, m2
|
punpckhbw m3, m0, m2
|
||||||
punpcklbw m0, m2
|
punpcklbw m0, m2
|
||||||
punpckhbw m4, m1, m2
|
punpckhbw m4, m1, m2
|
||||||
@ -308,11 +323,14 @@ BLEND_INIT difference, 5
|
|||||||
psubw m3, m4
|
psubw m3, m4
|
||||||
ABS2 m0, m3, m1, m4
|
ABS2 m0, m3, m1, m4
|
||||||
packuswb m0, m3
|
packuswb m0, m3
|
||||||
|
%endif
|
||||||
mova [dstq + xq], m0
|
mova [dstq + xq], m0
|
||||||
add xq, mmsize
|
add xq, mmsize
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro BLEND_ABS 0
|
||||||
BLEND_INIT extremity, 8
|
BLEND_INIT extremity, 8
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
VBROADCASTI128 m4, [pw_255]
|
VBROADCASTI128 m4, [pw_255]
|
||||||
@ -378,14 +396,32 @@ BLEND_SCREEN
|
|||||||
AVERAGE
|
AVERAGE
|
||||||
GRAINMERGE
|
GRAINMERGE
|
||||||
HARDMIX
|
HARDMIX
|
||||||
PHOENIX
|
PHOENIX phoenix, b
|
||||||
|
DIFFERENCE difference
|
||||||
DIVIDE
|
DIVIDE
|
||||||
|
|
||||||
BLEND_ABS
|
BLEND_ABS
|
||||||
|
|
||||||
|
%if ARCH_X86_64
|
||||||
|
BLEND_SIMPLE addition_16, addusw, 1
|
||||||
|
BLEND_SIMPLE and_16, and, 1
|
||||||
|
BLEND_SIMPLE or_16, or, 1
|
||||||
|
BLEND_SIMPLE subtract_16, subusw, 1
|
||||||
|
BLEND_SIMPLE xor_16, xor, 1
|
||||||
|
%endif
|
||||||
|
|
||||||
INIT_XMM ssse3
|
INIT_XMM ssse3
|
||||||
|
DIFFERENCE difference
|
||||||
BLEND_ABS
|
BLEND_ABS
|
||||||
|
|
||||||
|
INIT_XMM sse4
|
||||||
|
%if ARCH_X86_64
|
||||||
|
BLEND_SIMPLE darken_16, minuw, 1
|
||||||
|
BLEND_SIMPLE lighten_16, maxuw, 1
|
||||||
|
PHOENIX phoenix_16, w, 1
|
||||||
|
DIFFERENCE difference_16, 1
|
||||||
|
%endif
|
||||||
|
|
||||||
%if HAVE_AVX2_EXTERNAL
|
%if HAVE_AVX2_EXTERNAL
|
||||||
INIT_YMM avx2
|
INIT_YMM avx2
|
||||||
BLEND_SIMPLE xor, xor
|
BLEND_SIMPLE xor, xor
|
||||||
@ -401,7 +437,20 @@ BLEND_SCREEN
|
|||||||
AVERAGE
|
AVERAGE
|
||||||
GRAINMERGE
|
GRAINMERGE
|
||||||
HARDMIX
|
HARDMIX
|
||||||
PHOENIX
|
PHOENIX phoenix, b
|
||||||
|
|
||||||
|
DIFFERENCE difference
|
||||||
BLEND_ABS
|
BLEND_ABS
|
||||||
|
|
||||||
|
%if ARCH_X86_64
|
||||||
|
BLEND_SIMPLE addition_16, addusw, 1
|
||||||
|
BLEND_SIMPLE and_16, and, 1
|
||||||
|
BLEND_SIMPLE darken_16, minuw, 1
|
||||||
|
BLEND_SIMPLE lighten_16, maxuw, 1
|
||||||
|
BLEND_SIMPLE or_16, or, 1
|
||||||
|
BLEND_SIMPLE subtract_16, subusw, 1
|
||||||
|
BLEND_SIMPLE xor_16, xor, 1
|
||||||
|
PHOENIX phoenix_16, w, 1
|
||||||
|
DIFFERENCE difference_16, 1
|
||||||
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
@ -69,6 +69,27 @@ BLEND_FUNC(negation, sse2)
|
|||||||
BLEND_FUNC(negation, ssse3)
|
BLEND_FUNC(negation, ssse3)
|
||||||
BLEND_FUNC(negation, avx2)
|
BLEND_FUNC(negation, avx2)
|
||||||
|
|
||||||
|
#if ARCH_X86_64
|
||||||
|
BLEND_FUNC(addition_16, sse2)
|
||||||
|
BLEND_FUNC(addition_16, avx2)
|
||||||
|
BLEND_FUNC(and_16, sse2)
|
||||||
|
BLEND_FUNC(and_16, avx2)
|
||||||
|
BLEND_FUNC(darken_16, sse4)
|
||||||
|
BLEND_FUNC(darken_16, avx2)
|
||||||
|
BLEND_FUNC(difference_16, sse4)
|
||||||
|
BLEND_FUNC(difference_16, avx2)
|
||||||
|
BLEND_FUNC(lighten_16, sse4)
|
||||||
|
BLEND_FUNC(lighten_16, avx2)
|
||||||
|
BLEND_FUNC(or_16, sse2)
|
||||||
|
BLEND_FUNC(or_16, avx2)
|
||||||
|
BLEND_FUNC(phoenix_16, sse4)
|
||||||
|
BLEND_FUNC(phoenix_16, avx2)
|
||||||
|
BLEND_FUNC(subtract_16, sse2)
|
||||||
|
BLEND_FUNC(subtract_16, avx2)
|
||||||
|
BLEND_FUNC(xor_16, sse2)
|
||||||
|
BLEND_FUNC(xor_16, avx2)
|
||||||
|
#endif /* ARCH_X86_64 */
|
||||||
|
|
||||||
av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
|
av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
|
||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
@ -125,5 +146,38 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
|
|||||||
case BLEND_NEGATION: param->blend = ff_blend_negation_avx2; break;
|
case BLEND_NEGATION: param->blend = ff_blend_negation_avx2; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else { /* is_16_bit */
|
||||||
|
#if ARCH_X86_64
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) {
|
||||||
|
switch (param->mode) {
|
||||||
|
case BLEND_ADDITION: param->blend = ff_blend_addition_16_sse2; break;
|
||||||
|
case BLEND_AND: param->blend = ff_blend_and_16_sse2; break;
|
||||||
|
case BLEND_OR: param->blend = ff_blend_or_16_sse2; break;
|
||||||
|
case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_sse2; break;
|
||||||
|
case BLEND_XOR: param->blend = ff_blend_xor_16_sse2; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (EXTERNAL_SSE4(cpu_flags) && param->opacity == 1) {
|
||||||
|
switch (param->mode) {
|
||||||
|
case BLEND_DARKEN: param->blend = ff_blend_darken_16_sse4; break;
|
||||||
|
case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_sse4; break;
|
||||||
|
case BLEND_LIGHTEN: param->blend = ff_blend_lighten_16_sse4; break;
|
||||||
|
case BLEND_PHOENIX: param->blend = ff_blend_phoenix_16_sse4; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1) {
|
||||||
|
switch (param->mode) {
|
||||||
|
case BLEND_ADDITION: param->blend = ff_blend_addition_16_avx2; break;
|
||||||
|
case BLEND_AND: param->blend = ff_blend_and_16_avx2; break;
|
||||||
|
case BLEND_DARKEN: param->blend = ff_blend_darken_16_avx2; break;
|
||||||
|
case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_avx2; break;
|
||||||
|
case BLEND_LIGHTEN: param->blend = ff_blend_lighten_16_avx2; break;
|
||||||
|
case BLEND_OR: param->blend = ff_blend_or_16_avx2; break;
|
||||||
|
case BLEND_PHOENIX: param->blend = ff_blend_phoenix_16_avx2; break;
|
||||||
|
case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_avx2; break;
|
||||||
|
case BLEND_XOR: param->blend = ff_blend_xor_16_avx2; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* ARCH_X86_64 */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user