mirror of https://git.ffmpeg.org/ffmpeg.git
x86/vf_blend: optimize difference and negation functions
Process more pixels per loop. Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
fa50d9360b
commit
0daa1cf073
|
@ -268,21 +268,25 @@ BLEND_INIT phoenix, 4
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
%macro BLEND_ABS 0
|
%macro BLEND_ABS 0
|
||||||
BLEND_INIT difference, 3
|
BLEND_INIT difference, 5
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov xq, widthq
|
mov xq, widthq
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movh m0, [topq + xq]
|
movu m0, [topq + xq]
|
||||||
movh m1, [bottomq + xq]
|
movu m1, [bottomq + xq]
|
||||||
|
punpckhbw m3, m0, m2
|
||||||
punpcklbw m0, m2
|
punpcklbw m0, m2
|
||||||
|
punpckhbw m4, m1, m2
|
||||||
punpcklbw m1, m2
|
punpcklbw m1, m2
|
||||||
psubw m0, m1
|
psubw m0, m1
|
||||||
|
psubw m3, m4
|
||||||
ABS1 m0, m1
|
ABS1 m0, m1
|
||||||
packuswb m0, m0
|
ABS1 m3, m4
|
||||||
movh [dstq + xq], m0
|
packuswb m0, m3
|
||||||
add xq, mmsize / 2
|
mova [dstq + xq], m0
|
||||||
|
add xq, mmsize
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
|
@ -311,26 +315,30 @@ BLEND_INIT extremity, 8
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
|
|
||||||
BLEND_INIT negation, 5
|
BLEND_INIT negation, 8
|
||||||
pxor m2, m2
|
pxor m2, m2
|
||||||
mova m4, [pw_255]
|
mova m4, [pw_255]
|
||||||
.nextrow:
|
.nextrow:
|
||||||
mov xq, widthq
|
mov xq, widthq
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
movh m0, [topq + xq]
|
movu m0, [topq + xq]
|
||||||
movh m1, [bottomq + xq]
|
movu m1, [bottomq + xq]
|
||||||
|
punpckhbw m5, m0, m2
|
||||||
punpcklbw m0, m2
|
punpcklbw m0, m2
|
||||||
|
punpckhbw m6, m1, m2
|
||||||
punpcklbw m1, m2
|
punpcklbw m1, m2
|
||||||
mova m3, m4
|
psubw m3, m4, m0
|
||||||
psubw m3, m0
|
psubw m7, m4, m5
|
||||||
psubw m3, m1
|
psubw m3, m1
|
||||||
|
psubw m7, m6
|
||||||
ABS1 m3, m1
|
ABS1 m3, m1
|
||||||
mova m0, m4
|
ABS1 m7, m1
|
||||||
psubw m0, m3
|
psubw m0, m4, m3
|
||||||
packuswb m0, m0
|
psubw m1, m4, m7
|
||||||
movh [dstq + xq], m0
|
packuswb m0, m1
|
||||||
add xq, mmsize / 2
|
mova [dstq + xq], m0
|
||||||
|
add xq, mmsize
|
||||||
jl .loop
|
jl .loop
|
||||||
BLEND_END
|
BLEND_END
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
Loading…
Reference in New Issue