lavr: x86: optimized 2-channel flt to s16p conversion

This commit is contained in:
Justin Ruggles 2012-05-02 18:21:26 -04:00
parent 6092dafb5a
commit 31d0d7181d
2 changed files with 55 additions and 0 deletions

View File

@ -1045,3 +1045,49 @@ CONV_S16_TO_FLTP_6CH
INIT_XMM avx
CONV_S16_TO_FLTP_6CH
%endif
;------------------------------------------------------------------------------
; void ff_conv_flt_to_s16p_2ch(int16_t *const *dst, float *src, int len,
; int channels);
;------------------------------------------------------------------------------
%macro CONV_FLT_TO_S16P_2CH 0
cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1
lea lenq, [2*lend]
mov dst1q, [dst0q+gprsize]
mov dst0q, [dst0q ]
lea srcq, [srcq+4*lenq]
add dst0q, lenq
add dst1q, lenq
neg lenq
mova m5, [pf_s16_scale]
.loop:
mova m0, [srcq+4*lenq ]
mova m1, [srcq+4*lenq+ mmsize]
mova m2, [srcq+4*lenq+2*mmsize]
mova m3, [srcq+4*lenq+3*mmsize]
DEINT2_PS 0, 1, 4
DEINT2_PS 2, 3, 4
mulps m0, m0, m5
mulps m1, m1, m5
mulps m2, m2, m5
mulps m3, m3, m5
cvtps2dq m0, m0
cvtps2dq m1, m1
cvtps2dq m2, m2
cvtps2dq m3, m3
packssdw m0, m2
packssdw m1, m3
mova [dst0q+lenq], m0
mova [dst1q+lenq], m1
add lenq, mmsize
jl .loop
REP_RET
%endmacro
INIT_XMM sse2
CONV_FLT_TO_S16P_2CH
%if HAVE_AVX
INIT_XMM avx
CONV_FLT_TO_S16P_2CH
%endif

View File

@ -120,6 +120,11 @@ extern void ff_conv_s16_to_fltp_6ch_sse4 (float *const *dst, int16_t *src,
extern void ff_conv_s16_to_fltp_6ch_avx (float *const *dst, int16_t *src,
int len, int channels);
extern void ff_conv_flt_to_s16p_2ch_sse2(int16_t *const *dst, float *src,
int len, int channels);
extern void ff_conv_flt_to_s16p_2ch_avx (int16_t *const *dst, float *src,
int len, int channels);
av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
{
#if HAVE_YASM
@ -175,6 +180,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2);
}
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
@ -219,6 +226,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx);
}
#endif
}