swr: mix_1_1 int16 MMX

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2012-06-12 16:27:00 +02:00
parent 52afa43691
commit cbeeaf2593
3 changed files with 79 additions and 1 deletions

View File

@ -380,7 +380,7 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus
in_i= s->matrix_ch[out_i][1];
if(s->matrix[out_i][in_i]!=1.0){
if(s->mix_1_1_simd && len1)
s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_matrix, in->ch_count*out_i + in_i, len1);
s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_simd_matrix, in->ch_count*out_i + in_i, len1);
if(len != len1)
s->mix_1_1_f (out->ch[out_i]+off, in->ch[in_i]+off, s->native_matrix, in->ch_count*out_i + in_i, len-len1);
}else if(mustcopy){

View File

@ -21,6 +21,12 @@
%include "libavutil/x86/x86inc.asm"
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
align 32
dw1: times 8 dd 1
w1 : times 16 dw 1
SECTION .text
%macro MIX2_FLT 1
@ -99,6 +105,63 @@ mix_1_1_float_u_int %+ SUFFIX
REP_RET
%endmacro
%macro MIX1_INT16 1
cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
%ifidn %1, a
test inq, mmsize-1
jne mix_1_1_int16_u_int %+ SUFFIX
test outq, mmsize-1
jne mix_1_1_int16_u_int %+ SUFFIX
%else
mix_1_1_int16_u_int %+ SUFFIX
%endif
movd m4, [coeffpq + 4*indexq]
SPLATW m5, m4
psllq m4, 32
psrlq m4, 48
mova m0, [w1]
psllw m0, m4
psrlw m0, 1
punpcklwd m5, m0
add lenq , lenq
add inq , lenq
add outq , lenq
neg lenq
.next:
mov%1 m0, [inq + lenq ]
mov%1 m2, [inq + lenq + mmsize]
mova m1, m0
mova m3, m2
punpcklwd m0, [w1]
punpckhwd m1, [w1]
punpcklwd m2, [w1]
punpckhwd m3, [w1]
pmaddwd m0, m5
pmaddwd m1, m5
pmaddwd m2, m5
pmaddwd m3, m5
psrad m0, m4
psrad m1, m4
psrad m2, m4
psrad m3, m4
packssdw m0, m1
packssdw m2, m3
mov%1 [outq + lenq ], m0
mov%1 [outq + lenq + mmsize], m2
add lenq, mmsize*2
jl .next
%if mmsize == 8
emms
RET
%else
REP_RET
%endif
%endmacro
INIT_MMX mmx
MIX1_INT16 u
MIX1_INT16 a
INIT_XMM sse
MIX2_FLT u
MIX2_FLT a

View File

@ -163,6 +163,21 @@ void swri_rematrix_init_x86(struct SwrContext *s){
s->mix_2_1_simd = NULL;
if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
if(mm_flags & AV_CPU_FLAG_MMX) {
s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
}
s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
for(i=0; i<nb_out; i++){
int sh = 0;
for(j=0; j<nb_in; j++)
sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
sh = FFMAX(av_log2(sh) - 14, 0);
for(j=0; j<nb_in; j++) {
((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
}
}
} else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
if(mm_flags & AV_CPU_FLAG_SSE) {
s->mix_1_1_simd = ff_mix_1_1_a_float_sse;