mirror of https://git.ffmpeg.org/ffmpeg.git
swr: mix_1_1 int16 MMX
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
52afa43691
commit
cbeeaf2593
|
@ -380,7 +380,7 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus
|
|||
in_i= s->matrix_ch[out_i][1];
|
||||
if(s->matrix[out_i][in_i]!=1.0){
|
||||
if(s->mix_1_1_simd && len1)
|
||||
s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_matrix, in->ch_count*out_i + in_i, len1);
|
||||
s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_simd_matrix, in->ch_count*out_i + in_i, len1);
|
||||
if(len != len1)
|
||||
s->mix_1_1_f (out->ch[out_i]+off, in->ch[in_i]+off, s->native_matrix, in->ch_count*out_i + in_i, len-len1);
|
||||
}else if(mustcopy){
|
||||
|
|
|
@ -21,6 +21,12 @@
|
|||
%include "libavutil/x86/x86inc.asm"
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 32
|
||||
dw1: times 8 dd 1
|
||||
w1 : times 16 dw 1
|
||||
|
||||
SECTION .text
|
||||
|
||||
%macro MIX2_FLT 1
|
||||
|
@ -99,6 +105,63 @@ mix_1_1_float_u_int %+ SUFFIX
|
|||
REP_RET
|
||||
%endmacro
|
||||
|
||||
%macro MIX1_INT16 1
|
||||
cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
|
||||
%ifidn %1, a
|
||||
test inq, mmsize-1
|
||||
jne mix_1_1_int16_u_int %+ SUFFIX
|
||||
test outq, mmsize-1
|
||||
jne mix_1_1_int16_u_int %+ SUFFIX
|
||||
%else
|
||||
mix_1_1_int16_u_int %+ SUFFIX
|
||||
%endif
|
||||
movd m4, [coeffpq + 4*indexq]
|
||||
SPLATW m5, m4
|
||||
psllq m4, 32
|
||||
psrlq m4, 48
|
||||
mova m0, [w1]
|
||||
psllw m0, m4
|
||||
psrlw m0, 1
|
||||
punpcklwd m5, m0
|
||||
add lenq , lenq
|
||||
add inq , lenq
|
||||
add outq , lenq
|
||||
neg lenq
|
||||
.next:
|
||||
mov%1 m0, [inq + lenq ]
|
||||
mov%1 m2, [inq + lenq + mmsize]
|
||||
mova m1, m0
|
||||
mova m3, m2
|
||||
punpcklwd m0, [w1]
|
||||
punpckhwd m1, [w1]
|
||||
punpcklwd m2, [w1]
|
||||
punpckhwd m3, [w1]
|
||||
pmaddwd m0, m5
|
||||
pmaddwd m1, m5
|
||||
pmaddwd m2, m5
|
||||
pmaddwd m3, m5
|
||||
psrad m0, m4
|
||||
psrad m1, m4
|
||||
psrad m2, m4
|
||||
psrad m3, m4
|
||||
packssdw m0, m1
|
||||
packssdw m2, m3
|
||||
mov%1 [outq + lenq ], m0
|
||||
mov%1 [outq + lenq + mmsize], m2
|
||||
add lenq, mmsize*2
|
||||
jl .next
|
||||
%if mmsize == 8
|
||||
emms
|
||||
RET
|
||||
%else
|
||||
REP_RET
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
MIX1_INT16 u
|
||||
MIX1_INT16 a
|
||||
|
||||
INIT_XMM sse
|
||||
MIX2_FLT u
|
||||
MIX2_FLT a
|
||||
|
|
|
@ -163,6 +163,21 @@ void swri_rematrix_init_x86(struct SwrContext *s){
|
|||
s->mix_2_1_simd = NULL;
|
||||
|
||||
if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
|
||||
if(mm_flags & AV_CPU_FLAG_MMX) {
|
||||
s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
|
||||
}
|
||||
s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
|
||||
for(i=0; i<nb_out; i++){
|
||||
int sh = 0;
|
||||
for(j=0; j<nb_in; j++)
|
||||
sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
|
||||
sh = FFMAX(av_log2(sh) - 14, 0);
|
||||
for(j=0; j<nb_in; j++) {
|
||||
((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
|
||||
((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
|
||||
((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
|
||||
}
|
||||
}
|
||||
} else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
|
||||
if(mm_flags & AV_CPU_FLAG_SSE) {
|
||||
s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
|
||||
|
|
Loading…
Reference in New Issue