mirror of https://git.ffmpeg.org/ffmpeg.git
x86/float_dsp: use three operand form for some instructions
Fixes compilation with old yasm Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
37a503ac87
commit
bda3a9faf4
|
@ -443,19 +443,19 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
|
||||||
INIT_YMM fma3
|
INIT_YMM fma3
|
||||||
cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
|
cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
|
||||||
xor offsetq, offsetq
|
xor offsetq, offsetq
|
||||||
xorps m0, m0
|
xorps m0, m0, m0
|
||||||
shl sized, 2
|
shl sized, 2
|
||||||
mov lenq, sizeq
|
mov lenq, sizeq
|
||||||
cmp lenq, 32
|
cmp lenq, 32
|
||||||
jl .l16
|
jl .l16
|
||||||
cmp lenq, 64
|
cmp lenq, 64
|
||||||
jl .l32
|
jl .l32
|
||||||
xorps m1, m1
|
xorps m1, m1, m1
|
||||||
cmp lenq, 128
|
cmp lenq, 128
|
||||||
jl .l64
|
jl .l64
|
||||||
and lenq, ~127
|
and lenq, ~127
|
||||||
xorps m2, m2
|
xorps m2, m2, m2
|
||||||
xorps m3, m3
|
xorps m3, m3, m3
|
||||||
.loop128:
|
.loop128:
|
||||||
movups m4, [v1q+offsetq]
|
movups m4, [v1q+offsetq]
|
||||||
movups m5, [v1q+offsetq + 32]
|
movups m5, [v1q+offsetq + 32]
|
||||||
|
@ -468,13 +468,13 @@ cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
|
||||||
add offsetq, 128
|
add offsetq, 128
|
||||||
cmp offsetq, lenq
|
cmp offsetq, lenq
|
||||||
jl .loop128
|
jl .loop128
|
||||||
addps m0, m2
|
addps m0, m0, m2
|
||||||
addps m1, m3
|
addps m1, m1, m3
|
||||||
mov lenq, sizeq
|
mov lenq, sizeq
|
||||||
and lenq, 127
|
and lenq, 127
|
||||||
cmp lenq, 64
|
cmp lenq, 64
|
||||||
jge .l64
|
jge .l64
|
||||||
addps m0, m1
|
addps m0, m0, m1
|
||||||
cmp lenq, 32
|
cmp lenq, 32
|
||||||
jge .l32
|
jge .l32
|
||||||
vextractf128 xmm2, m0, 1
|
vextractf128 xmm2, m0, 1
|
||||||
|
@ -502,7 +502,7 @@ cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
|
||||||
add offsetq, 64
|
add offsetq, 64
|
||||||
cmp offsetq, lenq
|
cmp offsetq, lenq
|
||||||
jl .loop64
|
jl .loop64
|
||||||
addps m0, m1
|
addps m0, m0, m1
|
||||||
mov lenq, sizeq
|
mov lenq, sizeq
|
||||||
and lenq, 63
|
and lenq, 63
|
||||||
cmp lenq, 32
|
cmp lenq, 32
|
||||||
|
|
Loading…
Reference in New Issue