;****************************************************************************** ;* Copyright (c) 2012 Michael Niedermayer ;* ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* ;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public ;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** %include "libavutil/x86/x86inc.asm" %include "libavutil/x86/x86util.asm" SECTION_RODATA align 32 flt2pm31: times 8 dd 4.6566129e-10 flt2p31 : times 8 dd 2147483648.0 flt2p15 : times 8 dd 32768.0 SECTION .text %macro INT16_TO_INT32 1 cglobal int16_to_int32_%1, 3, 3, 3, dst, src, len mov srcq, [srcq] mov dstq, [dstq] %ifidn %1, a test dstq, mmsize-1 jne int16_to_int32_u_int %+ SUFFIX test srcq, mmsize-1 jne int16_to_int32_u_int %+ SUFFIX %else int16_to_int32_u_int %+ SUFFIX %endif add dstq, lenq shr lenq, 1 add srcq, lenq neg lenq .next mov%1 m2, [srcq+lenq] pxor m0, m0 pxor m1, m1 punpcklwd m0, m2 punpckhwd m1, m2 mov%1 [ dstq+2*lenq], m0 mov%1 [mmsize + dstq+2*lenq], m1 add lenq, mmsize jl .next %if mmsize == 8 emms %endif REP_RET %endmacro %macro INT32_TO_FLOAT 1 cglobal int32_to_float_%1, 3, 3, 3, dst, src, len mov srcq, [srcq] mov dstq, [dstq] %ifidn %1, a test dstq, mmsize-1 jne int32_to_float_u_int %+ SUFFIX test srcq, mmsize-1 jne int32_to_float_u_int %+ SUFFIX %else int32_to_float_u_int %+ SUFFIX %endif add srcq, lenq add dstq, lenq neg lenq mova m2, [flt2pm31] .next: %ifidn %1, a cvtdq2ps m0, [ srcq+lenq] cvtdq2ps m1, [mmsize + srcq+lenq] %else movu m0, [ srcq+lenq] movu m1, [mmsize + srcq+lenq] cvtdq2ps m0, m0 cvtdq2ps m1, m1 %endif mulps m0, m0, m2 mulps m1, m1, m2 mov%1 [ dstq+lenq], m0 mov%1 [mmsize + dstq+lenq], m1 add lenq, 2*mmsize jl .next REP_RET %endmacro %macro INT16_TO_FLOAT 1 cglobal int16_to_float_%1, 3, 3, 4, dst, src, len mov srcq, [srcq] mov dstq, [dstq] %ifidn %1, a test dstq, mmsize-1 jne int16_to_float_u_int %+ SUFFIX test srcq, mmsize-1 jne int16_to_float_u_int %+ SUFFIX %else int16_to_float_u_int %+ SUFFIX %endif add dstq, lenq shr lenq, 1 add srcq, lenq neg lenq mova m3, [flt2pm31] .next: mov%1 m2, [srcq+lenq] pxor m0, m0 pxor m1, m1 punpcklwd m0, m2 punpckhwd m1, m2 cvtdq2ps m0, m0 cvtdq2ps m1, m1 mulps m0, m3 mulps m1, m3 mov%1 [ dstq+2*lenq], m0 mov%1 [mmsize + dstq+2*lenq], m1 add lenq, mmsize jl .next REP_RET %endmacro %macro FLOAT_TO_INT32 1 cglobal float_to_int32_%1, 3, 3, 5, dst, src, len mov srcq, [srcq] mov dstq, [dstq] %ifidn %1, a test dstq, mmsize-1 jne float_to_int32_u_int %+ SUFFIX test srcq, mmsize-1 jne float_to_int32_u_int %+ SUFFIX %else float_to_int32_u_int %+ SUFFIX %endif add srcq, lenq add dstq, lenq neg lenq mova m2, [flt2p31] .next: mov%1 m0, [ srcq+lenq] mov%1 m1, [mmsize + srcq+lenq] mulps m0, m2 mulps m1, m2 cvtps2dq m3, m0 cvtps2dq m4, m1 cmpnltps m0, m2 cmpnltps m1, m2 paddd m0, m3 paddd m1, m4 mov%1 [ dstq+lenq], m0 mov%1 [mmsize + dstq+lenq], m1 add lenq, 2*mmsize jl .next REP_RET %endmacro %macro FLOAT_TO_INT16 1 cglobal float_to_int16_%1, 3, 3, 3, dst, src, len mov srcq, [srcq] mov dstq, [dstq] %ifidn %1, a test dstq, mmsize-1 jne float_to_int16_u_int %+ SUFFIX test srcq, mmsize-1 jne float_to_int16_u_int %+ SUFFIX %else float_to_int16_u_int %+ SUFFIX %endif lea srcq, [srcq + 2*lenq] add dstq, lenq neg lenq mova m2, [flt2p15] .next: mov%1 m0, [ srcq+2*lenq] mov%1 m1, [mmsize + srcq+2*lenq] mulps m0, m2 mulps m1, m2 cvtps2dq m0, m0 cvtps2dq m1, m1 packssdw m0, m1 mov%1 [ dstq+lenq], m0 add lenq, mmsize jl .next REP_RET %endmacro %macro INT32_TO_INT16 1 cglobal int32_to_int16_%1, 3, 3, 2, dst, src, len mov srcq, [srcq] mov dstq, [dstq] %ifidn %1, a test dstq, mmsize-1 jne int32_to_int16_u_int %+ SUFFIX test srcq, mmsize-1 jne int32_to_int16_u_int %+ SUFFIX %else int32_to_int16_u_int %+ SUFFIX %endif lea srcq, [srcq + 2*lenq] add dstq, lenq neg lenq .next: mov%1 m0, [ srcq+2*lenq] mov%1 m1, [mmsize + srcq+2*lenq] psrad m0, 16 psrad m1, 16 packssdw m0, m1 mov%1 [ dstq+lenq], m0 add lenq, mmsize jl .next REP_RET %endmacro INIT_MMX mmx INT16_TO_INT32 u INT16_TO_INT32 a INT32_TO_INT16 u INT32_TO_INT16 a INIT_XMM sse INT16_TO_INT32 u INT16_TO_INT32 a INT32_TO_INT16 u INT32_TO_INT16 a INIT_XMM sse2 INT32_TO_FLOAT u INT32_TO_FLOAT a INT16_TO_FLOAT u INT16_TO_FLOAT a FLOAT_TO_INT32 u FLOAT_TO_INT32 a FLOAT_TO_INT16 u FLOAT_TO_INT16 a %if HAVE_AVX INIT_YMM avx INT32_TO_FLOAT u INT32_TO_FLOAT a %endif