mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-03-11 06:58:18 +00:00
x86: fft: replace call to memcpy by a loop
The function call was a mess to handle, and memcpy cannot make the assumptions we do in the new code. Tested on an IMC sample: 430c -> 370c. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
75d339e044
commit
a5bfa66df5
@ -615,8 +615,6 @@ cglobal fft_calc, 2,5,8
|
||||
.end:
|
||||
REP_RET
|
||||
|
||||
cextern_naked memcpy
|
||||
|
||||
cglobal fft_permute, 2,7,1
|
||||
mov r4, [r0 + FFTContext.revtab]
|
||||
mov r5, [r0 + FFTContext.tmpbuf]
|
||||
@ -637,29 +635,18 @@ cglobal fft_permute, 2,7,1
|
||||
cmp r0, r2
|
||||
jl .loop
|
||||
shl r2, 3
|
||||
%if ARCH_X86_64
|
||||
mov r0, r1
|
||||
mov r1, r5
|
||||
%endif
|
||||
%if WIN64
|
||||
sub rsp, 8
|
||||
call memcpy
|
||||
add rsp, 8
|
||||
RET
|
||||
%elif ARCH_X86_64
|
||||
%ifdef PIC
|
||||
jmp memcpy wrt ..plt
|
||||
%else
|
||||
jmp memcpy
|
||||
%endif
|
||||
%else
|
||||
push r2
|
||||
push r5
|
||||
push r1
|
||||
call memcpy
|
||||
add esp, 12
|
||||
RET
|
||||
%endif
|
||||
add r1, r2
|
||||
add r5, r2
|
||||
neg r2
|
||||
; nbits >= 2 (FFT4) and sizeof(FFTComplex)=8 => at least 32B
|
||||
.loopcopy:
|
||||
movaps xmm0, [r5 + r2]
|
||||
movaps xmm1, [r5 + r2 + 16]
|
||||
movaps [r1 + r2], xmm0
|
||||
movaps [r1 + r2 + 16], xmm1
|
||||
add r2, 32
|
||||
jl .loopcopy
|
||||
REP_RET
|
||||
|
||||
cglobal imdct_calc, 3,5,3
|
||||
mov r3d, [r0 + FFTContext.mdctsize]
|
||||
|
Loading…
Reference in New Issue
Block a user