mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-17 13:04:50 +00:00
x86: fft: fix imdct_half() for AVX
Some calculations were changed in b6a3849
to use mmsize, which was not correct
for the AVX version, which uses INIT_YMM and therefore has mmsize == 32.
Fixes Bug 341.
Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
This commit is contained in:
parent
150adea6da
commit
c728518b3c
@ -1009,7 +1009,11 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
|
||||
push rrevtab
|
||||
%endif
|
||||
|
||||
sub r3, mmsize/4
|
||||
%if mmsize == 8
|
||||
sub r3, 2
|
||||
%else
|
||||
sub r3, 4
|
||||
%endif
|
||||
%if ARCH_X86_64 || mmsize == 8
|
||||
xor r4, r4
|
||||
sub r4, r3
|
||||
@ -1036,7 +1040,9 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
|
||||
mova [r1+r5*8], m0
|
||||
mova [r1+r6*8], m2
|
||||
add r4, 2
|
||||
%elif ARCH_X86_64
|
||||
sub r4, 2
|
||||
%else
|
||||
%if ARCH_X86_64
|
||||
movzx r5, word [rrevtab+r4-4]
|
||||
movzx r6, word [rrevtab+r4-2]
|
||||
movzx r10, word [rrevtab+r3]
|
||||
@ -1057,7 +1063,8 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
|
||||
movlps [r1+r5*8], xmm1
|
||||
movhps [r1+r4*8], xmm1
|
||||
%endif
|
||||
sub r3, mmsize/4
|
||||
sub r3, 4
|
||||
%endif
|
||||
jns .pre
|
||||
|
||||
mov r5, r0
|
||||
|
Loading…
Reference in New Issue
Block a user