mirror of https://git.ffmpeg.org/ffmpeg.git
audiodsp/x86: yasmify vector_clipf_sse
This commit is contained in:
parent
683da86aab
commit
12004a9a7f
|
@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
|
|||
|
||||
# GCC inline assembly optimizations
|
||||
# subsystems
|
||||
MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o
|
||||
MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o
|
||||
MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \
|
||||
x86/hpeldsp_mmx.o
|
||||
|
|
|
@ -135,3 +135,46 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
|
|||
%else
|
||||
VECTOR_CLIP_INT32 6, 1, 0, 0
|
||||
%endif
|
||||
|
||||
; void ff_vector_clipf_sse(float *dst, const float *src,
|
||||
; int len, float min, float max)
|
||||
INIT_XMM sse
|
||||
cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max
|
||||
%if ARCH_X86_32
|
||||
VBROADCASTSS m0, minm
|
||||
VBROADCASTSS m1, maxm
|
||||
%elif WIN64
|
||||
VBROADCASTSS m0, m3
|
||||
VBROADCASTSS m1, maxm
|
||||
%else ; 64bit sysv
|
||||
VBROADCASTSS m0, m0
|
||||
VBROADCASTSS m1, m1
|
||||
%endif
|
||||
|
||||
movsxdifnidn lenq, lend
|
||||
|
||||
.loop
|
||||
mova m2, [srcq + 4 * lenq - 4 * mmsize]
|
||||
mova m3, [srcq + 4 * lenq - 3 * mmsize]
|
||||
mova m4, [srcq + 4 * lenq - 2 * mmsize]
|
||||
mova m5, [srcq + 4 * lenq - 1 * mmsize]
|
||||
|
||||
maxps m2, m0
|
||||
maxps m3, m0
|
||||
maxps m4, m0
|
||||
maxps m5, m0
|
||||
|
||||
minps m2, m1
|
||||
minps m3, m1
|
||||
minps m4, m1
|
||||
minps m5, m1
|
||||
|
||||
mova [dstq + 4 * lenq - 4 * mmsize], m2
|
||||
mova [dstq + 4 * lenq - 3 * mmsize], m3
|
||||
mova [dstq + 4 * lenq - 2 * mmsize], m4
|
||||
mova [dstq + 4 * lenq - 1 * mmsize], m5
|
||||
|
||||
sub lenq, mmsize
|
||||
jg .loop
|
||||
|
||||
RET
|
||||
|
|
|
@ -49,7 +49,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
|
|||
if (EXTERNAL_MMXEXT(cpu_flags))
|
||||
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
|
||||
|
||||
if (INLINE_SSE(cpu_flags))
|
||||
if (EXTERNAL_SSE(cpu_flags))
|
||||
c->vector_clipf = ff_vector_clipf_sse;
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
|
|
|
@ -1,58 +0,0 @@
|
|||
/*
|
||||
* This file is part of Libav.
|
||||
*
|
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* Libav is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "audiodsp.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
void ff_vector_clipf_sse(float *dst, const float *src,
|
||||
int len, float min, float max)
|
||||
{
|
||||
x86_reg i = (len - 16) * 4;
|
||||
__asm__ volatile (
|
||||
"movss %3, %%xmm4 \n\t"
|
||||
"movss %4, %%xmm5 \n\t"
|
||||
"shufps $0, %%xmm4, %%xmm4 \n\t"
|
||||
"shufps $0, %%xmm5, %%xmm5 \n\t"
|
||||
"1: \n\t"
|
||||
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
|
||||
"movaps 16(%2, %0), %%xmm1 \n\t"
|
||||
"movaps 32(%2, %0), %%xmm2 \n\t"
|
||||
"movaps 48(%2, %0), %%xmm3 \n\t"
|
||||
"maxps %%xmm4, %%xmm0 \n\t"
|
||||
"maxps %%xmm4, %%xmm1 \n\t"
|
||||
"maxps %%xmm4, %%xmm2 \n\t"
|
||||
"maxps %%xmm4, %%xmm3 \n\t"
|
||||
"minps %%xmm5, %%xmm0 \n\t"
|
||||
"minps %%xmm5, %%xmm1 \n\t"
|
||||
"minps %%xmm5, %%xmm2 \n\t"
|
||||
"minps %%xmm5, %%xmm3 \n\t"
|
||||
"movaps %%xmm0, (%1, %0) \n\t"
|
||||
"movaps %%xmm1, 16(%1, %0) \n\t"
|
||||
"movaps %%xmm2, 32(%1, %0) \n\t"
|
||||
"movaps %%xmm3, 48(%1, %0) \n\t"
|
||||
"sub $64, %0 \n\t"
|
||||
"jge 1b \n\t"
|
||||
: "+&r" (i)
|
||||
: "r" (dst), "r" (src), "m" (min), "m" (max)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#endif /* HAVE_INLINE_ASM */
|
Loading…
Reference in New Issue