From 43717469f9daa402f6acb48997255827a56034e9 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 28 Oct 2015 15:38:20 +0100 Subject: [PATCH] ac3dsp: Reverse matrix in/out order in downmix() Also use (float **) instead of (float (*)[2]). This matches the matrix layout in libavresample so we can reuse assembly code between the two. Signed-off-by: Diego Biurrun --- libavcodec/ac3dec.c | 40 ++++++++++++++++++++++++------------ libavcodec/ac3dec.h | 2 +- libavcodec/ac3dsp.c | 8 ++++---- libavcodec/ac3dsp.h | 2 +- libavcodec/x86/ac3dsp_init.c | 36 +++++++++++++++++--------------- 5 files changed, 52 insertions(+), 36 deletions(-) diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c index 9afc54d18d..aba31195fb 100644 --- a/libavcodec/ac3dec.c +++ b/libavcodec/ac3dec.c @@ -332,47 +332,57 @@ static int parse_frame_header(AC3DecodeContext *s) * Set stereo downmixing coefficients based on frame header info. * reference: Section 7.8.2 Downmixing Into Two Channels */ -static void set_downmix_coeffs(AC3DecodeContext *s) +static int set_downmix_coeffs(AC3DecodeContext *s) { int i; float cmix = gain_levels[s-> center_mix_level]; float smix = gain_levels[s->surround_mix_level]; float norm0, norm1; + if (!s->downmix_coeffs[0]) { + s->downmix_coeffs[0] = av_malloc(2 * AC3_MAX_CHANNELS * + sizeof(**s->downmix_coeffs)); + if (!s->downmix_coeffs[0]) + return AVERROR(ENOMEM); + s->downmix_coeffs[1] = s->downmix_coeffs[0] + AC3_MAX_CHANNELS; + } + for (i = 0; i < s->fbw_channels; i++) { - s->downmix_coeffs[i][0] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]]; - s->downmix_coeffs[i][1] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]]; + s->downmix_coeffs[0][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]]; + s->downmix_coeffs[1][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]]; } if (s->channel_mode > 1 && s->channel_mode & 1) { - s->downmix_coeffs[1][0] = s->downmix_coeffs[1][1] = cmix; + s->downmix_coeffs[0][1] = s->downmix_coeffs[1][1] = cmix; } if (s->channel_mode == AC3_CHMODE_2F1R || s->channel_mode == AC3_CHMODE_3F1R) { int nf = s->channel_mode - 2; - s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf][1] = smix * LEVEL_MINUS_3DB; + s->downmix_coeffs[0][nf] = s->downmix_coeffs[1][nf] = smix * LEVEL_MINUS_3DB; } if (s->channel_mode == AC3_CHMODE_2F2R || s->channel_mode == AC3_CHMODE_3F2R) { int nf = s->channel_mode - 4; - s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf+1][1] = smix; + s->downmix_coeffs[0][nf] = s->downmix_coeffs[1][nf+1] = smix; } /* renormalize */ norm0 = norm1 = 0.0; for (i = 0; i < s->fbw_channels; i++) { - norm0 += s->downmix_coeffs[i][0]; - norm1 += s->downmix_coeffs[i][1]; + norm0 += s->downmix_coeffs[0][i]; + norm1 += s->downmix_coeffs[1][i]; } norm0 = 1.0f / norm0; norm1 = 1.0f / norm1; for (i = 0; i < s->fbw_channels; i++) { - s->downmix_coeffs[i][0] *= norm0; - s->downmix_coeffs[i][1] *= norm1; + s->downmix_coeffs[0][i] *= norm0; + s->downmix_coeffs[1][i] *= norm1; } if (s->output_mode == AC3_CHMODE_MONO) { for (i = 0; i < s->fbw_channels; i++) - s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] + - s->downmix_coeffs[i][1]) * LEVEL_MINUS_3DB; + s->downmix_coeffs[0][i] = (s->downmix_coeffs[0][i] + + s->downmix_coeffs[1][i]) * LEVEL_MINUS_3DB; } + + return 0; } /** @@ -1447,7 +1457,10 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, /* set downmixing coefficients if needed */ if (s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) && s->fbw_channels == s->out_channels)) { - set_downmix_coeffs(s); + if ((ret = set_downmix_coeffs(s)) < 0) { + av_log(avctx, AV_LOG_ERROR, "error setting downmix coeffs\n"); + return ret; + } } } else if (!s->channels) { av_log(avctx, AV_LOG_ERROR, "unable to determine channel mode\n"); @@ -1566,6 +1579,7 @@ static av_cold int ac3_decode_end(AVCodecContext *avctx) AC3DecodeContext *s = avctx->priv_data; ff_mdct_end(&s->imdct_512); ff_mdct_end(&s->imdct_256); + av_freep(&s->downmix_coeffs[0]); return 0; } diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h index 4c5359cb87..4a7e281932 100644 --- a/libavcodec/ac3dec.h +++ b/libavcodec/ac3dec.h @@ -147,7 +147,7 @@ typedef struct AC3DecodeContext { int fbw_channels; ///< number of full-bandwidth channels int channels; ///< number of total channels int lfe_ch; ///< index of LFE channel - float downmix_coeffs[AC3_MAX_CHANNELS][2]; ///< stereo downmix coefficients + float *downmix_coeffs[2]; ///< stereo downmix coefficients int downmixed; ///< indicates if coeffs are currently downmixed int output_mode; ///< output channel configuration int out_channels; ///< number of output channels diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c index 38c35b187a..d1bf37e943 100644 --- a/libavcodec/ac3dsp.c +++ b/libavcodec/ac3dsp.c @@ -171,7 +171,7 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs) } } -static void ac3_downmix_c(float **samples, float (*matrix)[2], +static void ac3_downmix_c(float **samples, float **matrix, int out_ch, int in_ch, int len) { int i, j; @@ -180,8 +180,8 @@ static void ac3_downmix_c(float **samples, float (*matrix)[2], for (i = 0; i < len; i++) { v0 = v1 = 0.0f; for (j = 0; j < in_ch; j++) { - v0 += samples[j][i] * matrix[j][0]; - v1 += samples[j][i] * matrix[j][1]; + v0 += samples[j][i] * matrix[0][j]; + v1 += samples[j][i] * matrix[1][j]; } samples[0][i] = v0; samples[1][i] = v1; @@ -190,7 +190,7 @@ static void ac3_downmix_c(float **samples, float (*matrix)[2], for (i = 0; i < len; i++) { v0 = 0.0f; for (j = 0; j < in_ch; j++) - v0 += samples[j][i] * matrix[j][0]; + v0 += samples[j][i] * matrix[0][j]; samples[0][i] = v0; } } diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h index 6ca0c5b8e8..cdce21a865 100644 --- a/libavcodec/ac3dsp.h +++ b/libavcodec/ac3dsp.h @@ -126,7 +126,7 @@ typedef struct AC3DSPContext { void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs); - void (*downmix)(float **samples, float (*matrix)[2], int out_ch, + void (*downmix)(float **samples, float **matrix, int out_ch, int in_ch, int len); /** diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c index 89044f4088..9036389296 100644 --- a/libavcodec/x86/ac3dsp_init.c +++ b/libavcodec/x86/ac3dsp_init.c @@ -71,8 +71,8 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, #define MIX5(mono, stereo) \ __asm__ volatile ( \ "movss 0(%1), %%xmm5 \n" \ - "movss 8(%1), %%xmm6 \n" \ - "movss 24(%1), %%xmm7 \n" \ + "movss 4(%1), %%xmm6 \n" \ + "movss 12(%1), %%xmm7 \n" \ "shufps $0, %%xmm5, %%xmm5 \n" \ "shufps $0, %%xmm6, %%xmm6 \n" \ "shufps $0, %%xmm7, %%xmm7 \n" \ @@ -97,7 +97,7 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, "add $16, %0 \n" \ "jl 1b \n" \ : "+&r"(i) \ - : "r"(matrix), \ + : "r"(matrix[0]), \ "r"(samples[0] + len), \ "r"(samples[1] + len), \ "r"(samples[2] + len), \ @@ -141,22 +141,22 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, : "memory" \ ); -static void ac3_downmix_sse(float **samples, float (*matrix)[2], +static void ac3_downmix_sse(float **samples, float **matrix, int out_ch, int in_ch, int len) { - int (*matrix_cmp)[2] = (int(*)[2])matrix; + int **matrix_cmp = (int **)matrix; intptr_t i, j, k, m; i = -len * sizeof(float); if (in_ch == 5 && out_ch == 2 && - !(matrix_cmp[0][1] | matrix_cmp[2][0] | - matrix_cmp[3][1] | matrix_cmp[4][0] | - (matrix_cmp[1][0] ^ matrix_cmp[1][1]) | - (matrix_cmp[0][0] ^ matrix_cmp[2][1]))) { + !(matrix_cmp[1][0] | matrix_cmp[0][2] | + matrix_cmp[1][3] | matrix_cmp[0][4] | + (matrix_cmp[0][1] ^ matrix_cmp[1][1]) | + (matrix_cmp[0][0] ^ matrix_cmp[1][2]))) { MIX5(IF0, IF1); } else if (in_ch == 5 && out_ch == 1 && - matrix_cmp[0][0] == matrix_cmp[2][0] && - matrix_cmp[3][0] == matrix_cmp[4][0]) { + matrix_cmp[0][0] == matrix_cmp[0][2] && + matrix_cmp[0][3] == matrix_cmp[0][4]) { MIX5(IF1, IF0); } else { DECLARE_ALIGNED(16, float, matrix_simd)[AC3_MAX_CHANNELS][2][4]; @@ -166,18 +166,20 @@ static void ac3_downmix_sse(float **samples, float (*matrix)[2], samp[j] = samples[j] + len; j = 2 * in_ch * sizeof(float); + k = in_ch * sizeof(float); __asm__ volatile ( "1: \n" + "sub $4, %1 \n" "sub $8, %0 \n" - "movss (%2, %0), %%xmm4 \n" - "movss 4(%2, %0), %%xmm5 \n" + "movss (%3, %1), %%xmm4 \n" + "movss (%4, %1), %%xmm5 \n" "shufps $0, %%xmm4, %%xmm4 \n" "shufps $0, %%xmm5, %%xmm5 \n" - "movaps %%xmm4, (%1, %0, 4) \n" - "movaps %%xmm5, 16(%1, %0, 4) \n" + "movaps %%xmm4, (%2, %0, 4) \n" + "movaps %%xmm5, 16(%2, %0, 4) \n" "jg 1b \n" - : "+&r"(j) - : "r"(matrix_simd), "r"(matrix) + : "+&r"(j), "+&r"(k) + : "r"(matrix_simd), "r"(matrix[0]), "r"(matrix[1]) : "memory" ); if (out_ch == 2) {