mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-20 14:20:51 +00:00
fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm
This commit is contained in:
parent
4e8e262476
commit
aad3429d4e
@ -112,6 +112,58 @@ FLOAT_TO_INT16 3dnow, 0
|
||||
%undef cvtps2pi
|
||||
|
||||
|
||||
;-------------------------------------------------------------------------------
|
||||
; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len);
|
||||
;-------------------------------------------------------------------------------
|
||||
%macro FLOAT_TO_INT16_INTERLEAVE2 1
|
||||
cglobal float_to_int16_interleave2_%1, 3,4,2, dst, src0, src1, len
|
||||
lea lenq, [4*r2q]
|
||||
mov src1q, [src0q+gprsize]
|
||||
mov src0q, [src0q]
|
||||
add dstq, lenq
|
||||
add src0q, lenq
|
||||
add src1q, lenq
|
||||
neg lenq
|
||||
.loop:
|
||||
%ifidn %1, sse2
|
||||
cvtps2dq m0, [src0q+lenq]
|
||||
cvtps2dq m1, [src1q+lenq]
|
||||
packssdw m0, m1
|
||||
movhlps m1, m0
|
||||
punpcklwd m0, m1
|
||||
mova [dstq+lenq], m0
|
||||
%else
|
||||
cvtps2pi m0, [src0q+lenq ]
|
||||
cvtps2pi m1, [src0q+lenq+8]
|
||||
cvtps2pi m2, [src1q+lenq ]
|
||||
cvtps2pi m3, [src1q+lenq+8]
|
||||
packssdw m0, m1
|
||||
packssdw m2, m3
|
||||
mova m1, m0
|
||||
punpcklwd m0, m2
|
||||
punpckhwd m1, m2
|
||||
mova [dstq+lenq ], m0
|
||||
mova [dstq+lenq+8], m1
|
||||
%endif
|
||||
add lenq, 16
|
||||
js .loop
|
||||
%ifnidn %1, sse2
|
||||
emms
|
||||
%endif
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
%define cvtps2pi pf2id
|
||||
FLOAT_TO_INT16_INTERLEAVE2 3dnow
|
||||
%undef cvtps2pi
|
||||
%define movdqa movaps
|
||||
FLOAT_TO_INT16_INTERLEAVE2 sse
|
||||
%undef movdqa
|
||||
INIT_XMM
|
||||
FLOAT_TO_INT16_INTERLEAVE2 sse2
|
||||
|
||||
|
||||
%macro PSWAPD_SSE 2
|
||||
pshufw %1, %2, 0x4e
|
||||
%endmacro
|
||||
|
@ -35,13 +35,17 @@ void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
|
||||
void ff_float_to_int16_sse (int16_t *dst, const float *src, long len);
|
||||
void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
|
||||
|
||||
void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
|
||||
void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len);
|
||||
void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
|
||||
|
||||
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
|
||||
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
|
||||
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
|
||||
|
||||
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
|
||||
|
||||
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
|
||||
#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
|
||||
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
|
||||
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
|
||||
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
|
||||
@ -57,71 +61,16 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
|
||||
if(channels==1)\
|
||||
ff_float_to_int16_##cpu(dst, src[0], len);\
|
||||
else if(channels==2){\
|
||||
x86_reg reglen = len; \
|
||||
const float *src0 = src[0];\
|
||||
const float *src1 = src[1];\
|
||||
__asm__ volatile(\
|
||||
"shl $2, %0 \n"\
|
||||
"add %0, %1 \n"\
|
||||
"add %0, %2 \n"\
|
||||
"add %0, %3 \n"\
|
||||
"neg %0 \n"\
|
||||
body\
|
||||
:"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
|
||||
);\
|
||||
ff_float_to_int16_interleave2_##cpu(dst, src, len);\
|
||||
}else if(channels==6){\
|
||||
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
|
||||
}else\
|
||||
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
|
||||
}
|
||||
|
||||
FLOAT_TO_INT16_INTERLEAVE(3dnow,
|
||||
"1: \n"
|
||||
"pf2id (%2,%0), %%mm0 \n"
|
||||
"pf2id 8(%2,%0), %%mm1 \n"
|
||||
"pf2id (%3,%0), %%mm2 \n"
|
||||
"pf2id 8(%3,%0), %%mm3 \n"
|
||||
"packssdw %%mm1, %%mm0 \n"
|
||||
"packssdw %%mm3, %%mm2 \n"
|
||||
"movq %%mm0, %%mm1 \n"
|
||||
"punpcklwd %%mm2, %%mm0 \n"
|
||||
"punpckhwd %%mm2, %%mm1 \n"
|
||||
"movq %%mm0, (%1,%0)\n"
|
||||
"movq %%mm1, 8(%1,%0)\n"
|
||||
"add $16, %0 \n"
|
||||
"js 1b \n"
|
||||
"femms \n"
|
||||
)
|
||||
|
||||
FLOAT_TO_INT16_INTERLEAVE(sse,
|
||||
"1: \n"
|
||||
"cvtps2pi (%2,%0), %%mm0 \n"
|
||||
"cvtps2pi 8(%2,%0), %%mm1 \n"
|
||||
"cvtps2pi (%3,%0), %%mm2 \n"
|
||||
"cvtps2pi 8(%3,%0), %%mm3 \n"
|
||||
"packssdw %%mm1, %%mm0 \n"
|
||||
"packssdw %%mm3, %%mm2 \n"
|
||||
"movq %%mm0, %%mm1 \n"
|
||||
"punpcklwd %%mm2, %%mm0 \n"
|
||||
"punpckhwd %%mm2, %%mm1 \n"
|
||||
"movq %%mm0, (%1,%0)\n"
|
||||
"movq %%mm1, 8(%1,%0)\n"
|
||||
"add $16, %0 \n"
|
||||
"js 1b \n"
|
||||
"emms \n"
|
||||
)
|
||||
|
||||
FLOAT_TO_INT16_INTERLEAVE(sse2,
|
||||
"1: \n"
|
||||
"cvtps2dq (%2,%0), %%xmm0 \n"
|
||||
"cvtps2dq (%3,%0), %%xmm1 \n"
|
||||
"packssdw %%xmm1, %%xmm0 \n"
|
||||
"movhlps %%xmm0, %%xmm1 \n"
|
||||
"punpcklwd %%xmm1, %%xmm0 \n"
|
||||
"movdqa %%xmm0, (%1,%0) \n"
|
||||
"add $16, %0 \n"
|
||||
"js 1b \n"
|
||||
)
|
||||
FLOAT_TO_INT16_INTERLEAVE(3dnow)
|
||||
FLOAT_TO_INT16_INTERLEAVE(sse)
|
||||
FLOAT_TO_INT16_INTERLEAVE(sse2)
|
||||
|
||||
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
|
||||
if(channels==6)
|
||||
|
Loading…
Reference in New Issue
Block a user