From c88e60af76ad6cf3b193a7f160256061b085125e Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Thu, 5 Jul 2012 15:17:39 +0200 Subject: [PATCH] swr/x86: 10l, missed some SSE2 instructions in code marked as SSE. Signed-off-by: Michael Niedermayer --- libswresample/x86/audio_convert.asm | 3 +-- libswresample/x86/swresample_x86.c | 42 +++++++++++++---------------- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm index 6fc86f0748..fcf856f942 100644 --- a/libswresample/x86/audio_convert.asm +++ b/libswresample/x86/audio_convert.asm @@ -377,7 +377,7 @@ CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N -INIT_XMM sse +INIT_XMM sse2 CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N @@ -401,7 +401,6 @@ UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N -INIT_XMM sse2 CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT diff --git a/libswresample/x86/swresample_x86.c b/libswresample/x86/swresample_x86.c index c9a99a5e51..71806c12bf 100644 --- a/libswresample/x86/swresample_x86.c +++ b/libswresample/x86/swresample_x86.c @@ -49,7 +49,7 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac, } MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx) -MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse) +MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE2, sse2) if(mm_flags & AV_CPU_FLAG_MMX) { if(channels == 6) { @@ -58,28 +58,6 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse) } } - if(mm_flags & AV_CPU_FLAG_SSE) { - if(channels == 2) { - if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) - ac->simd_f = ff_pack_2ch_int32_to_int32_a_sse; - if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S16P) - ac->simd_f = ff_pack_2ch_int16_to_int16_a_sse; - if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S16P) - ac->simd_f = ff_pack_2ch_int16_to_int32_a_sse; - if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S32P) - ac->simd_f = ff_pack_2ch_int32_to_int16_a_sse; - - if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32) - ac->simd_f = ff_unpack_2ch_int32_to_int32_a_sse; - if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S16) - ac->simd_f = ff_unpack_2ch_int16_to_int16_a_sse; - if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16) - ac->simd_f = ff_unpack_2ch_int16_to_int32_a_sse; - if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32) - ac->simd_f = ff_unpack_2ch_int32_to_int16_a_sse; - } - } - if(mm_flags & AV_CPU_FLAG_SSE2) { if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P) ac->simd_f = ff_int32_to_float_a_sse2; @@ -91,6 +69,24 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse) ac->simd_f = ff_float_to_int16_a_sse2; if(channels == 2) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_2ch_int32_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S16P) + ac->simd_f = ff_pack_2ch_int16_to_int16_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S16P) + ac->simd_f = ff_pack_2ch_int16_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_2ch_int32_to_int16_a_sse2; + + if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32) + ac->simd_f = ff_unpack_2ch_int32_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S16) + ac->simd_f = ff_unpack_2ch_int16_to_int16_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16) + ac->simd_f = ff_unpack_2ch_int16_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32) + ac->simd_f = ff_unpack_2ch_int32_to_int16_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P) ac->simd_f = ff_pack_2ch_int32_to_float_a_sse2; if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)