x86/vf_gblur: fix postscale_slice prologue

x86_32 ABI does not pass float arguments directly on xmm regs, and the Win64
ABI uses only the first four regs for this purpose.

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer 2021-02-17 10:46:36 -03:00
parent 670051b524
commit 2b4da1cb8c
2 changed files with 14 additions and 18 deletions

View File

@ -234,8 +234,7 @@ void ff_gblur_init(GBlurContext *s)
{
s->horiz_slice = horiz_slice_c;
s->postscale_slice = postscale_c;
if (ARCH_X86_64)
ff_gblur_init_x86(s);
ff_gblur_init_x86(s);
}
static int config_input(AVFilterLink *inlink)

View File

@ -185,27 +185,24 @@ HORIZ_SLICE
%endif
%macro POSTSCALE_SLICE 0
%if UNIX64
cglobal postscale_slice, 2, 2, 4, ptr, length
%else
cglobal postscale_slice, 5, 5, 4, ptr, length, postscale, min, max
%endif
cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
shl lengthd, 2
add ptrq, lengthq
neg lengthq
%if WIN64
%if ARCH_X86_32
VBROADCASTSS m0, postscalem
VBROADCASTSS m1, minm
VBROADCASTSS m2, maxm
%elif WIN64
SWAP 0, 2
SWAP 1, 3
SWAP 2, 4
%endif
%if cpuflag(avx2)
vbroadcastss m0, xm0
vbroadcastss m1, xm1
vbroadcastss m2, xm2
%else
shufps xm0, xm0, 0
shufps xm1, xm1, 0
shufps xm2, xm2, 0
VBROADCASTSS m0, xm0
VBROADCASTSS m1, xm1
VBROADCASTSS m2, maxm
%else ; UNIX64
VBROADCASTSS m0, xm0
VBROADCASTSS m1, xm1
VBROADCASTSS m2, xm3
%endif
.loop: