Merge commit '08e3ea60ff4059341b74be04a428a38f7c3630b0'

* commit '08e3ea60ff4059341b74be04a428a38f7c3630b0':
  x86: synth filter float: implement SSE2 version

Conflicts:
	libavcodec/x86/dcadsp.asm
	libavcodec/x86/dcadsp_init.c

See: 2cdbcc0048
Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2014-02-28 20:38:12 +01:00
commit baf3adc621
2 changed files with 41 additions and 43 deletions

View File

@ -178,7 +178,7 @@ DCA_LFE_FIR 1
INIT_XMM sse2
%macro INNER_LOOP 1
; reading backwards: ptr1=synth_buf+j+i ptr2=synth_big+j-i
; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
;~ a += window[i + j] * (-synth_buf[15 - i + j])
;~ b += window[i + j + 16] * (synth_buf[i + j])
pshufd m5, [ptr2 + j + (15 - 3) * 4], q0123
@ -264,17 +264,15 @@ cglobal synth_filter_inner, 0,6+4*ARCH_X86_64,7+6*ARCH_X86_64, \
mov ptr1, synth_bufm
add win, i
add ptr1, i
%else
%else ; ARCH_X86_64
%define ptr1 r6q
%define ptr2 r7q ; must be loaded
%define win r8q
%define j r9q
%if ARCH_X86_64
pxor m9, m9
pxor m10, m10
mova m7, [buf2 + i + mmsize]
mova m8, [buf2 + i + mmsize + 16 * 4]
%endif
lea win, [windowq + i]
lea ptr1, [synth_bufq + i]
%endif