diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c index e3ca37ebdd..de58f3ab26 100644 --- a/libavcodec/ac3dsp.c +++ b/libavcodec/ac3dsp.c @@ -128,24 +128,33 @@ static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd, } while (end > ff_ac3_band_start_tab[band++]); } -static int ac3_compute_mantissa_size_c(int mant_cnt[5], uint8_t *bap, - int nb_coefs) +static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap, + int len) { - int bits, b, i; + while (len-- >= 0) + mant_cnt[bap[len]]++; +} - bits = 0; - for (i = 0; i < nb_coefs; i++) { - b = bap[i]; - if (b <= 4) { - // bap=1 to bap=4 will be counted in compute_mantissa_size_final - mant_cnt[b]++; - } else if (b <= 13) { - // bap=5 to bap=13 use (bap-1) bits - bits += b - 1; - } else { - // bap=14 uses 14 bits and bap=15 uses 16 bits - bits += (b == 14) ? 14 : 16; - } +DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = { + 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 +}; + +static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16]) +{ + int blk, bap; + int bits = 0; + + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + // bap=1 : 3 mantissas in 5 bits + bits += (mant_cnt[blk][1] / 3) * 5; + // bap=2 : 3 mantissas in 7 bits + // bap=4 : 2 mantissas in 7 bits + bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7; + // bap=3 : 1 mantissa in 3 bits + bits += mant_cnt[blk][3] * 3; + // bap=5 to 15 : get bits per mantissa from table + for (bap = 5; bap < 16; bap++) + bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap]; } return bits; } @@ -181,6 +190,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact) c->ac3_rshift_int32 = ac3_rshift_int32_c; c->float_to_fixed24 = float_to_fixed24_c; c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c; + c->update_bap_counts = ac3_update_bap_counts_c; c->compute_mantissa_size = ac3_compute_mantissa_size_c; c->extract_exponents = ac3_extract_exponents_c; diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h index b750767e81..8eeafd68ac 100644 --- a/libavcodec/ac3dsp.h +++ b/libavcodec/ac3dsp.h @@ -24,6 +24,12 @@ #include +/** + * Number of mantissa bits written for each bap value. + * bap values with fractional bits are set to 0 and are calculated separately. + */ +extern const uint16_t ff_ac3_bap_bits[16]; + typedef struct AC3DSPContext { /** * Set each encoded exponent in a block to the minimum of itself and the @@ -102,9 +108,21 @@ typedef struct AC3DSPContext { const uint8_t *bap_tab, uint8_t *bap); /** - * Calculate the number of bits needed to encode a set of mantissas. + * Update bap counts using the supplied array of bap. + * + * @param[out] mant_cnt bap counts for 1 block + * @param[in] bap array of bap, pointing to start coef bin + * @param[in] len number of elements to process */ - int (*compute_mantissa_size)(int mant_cnt[5], uint8_t *bap, int nb_coefs); + void (*update_bap_counts)(uint16_t mant_cnt[16], uint8_t *bap, int len); + + /** + * Calculate the number of bits needed to encode a set of mantissas. + * + * @param[in] mant_cnt bap counts for all blocks + * @return mantissa bit count + */ + int (*compute_mantissa_size)(uint16_t mant_cnt[6][16]); void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs); } AC3DSPContext; diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index 6b9bd87853..66dfc29217 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -1423,22 +1423,6 @@ static void count_frame_bits(AC3EncodeContext *s) } -/** - * Finalize the mantissa bit count by adding in the grouped mantissas. - */ -static int compute_mantissa_size_final(int mant_cnt[5]) -{ - // bap=1 : 3 mantissas in 5 bits - int bits = (mant_cnt[1] / 3) * 5; - // bap=2 : 3 mantissas in 7 bits - // bap=4 : 2 mantissas in 7 bits - bits += ((mant_cnt[2] / 3) + (mant_cnt[4] >> 1)) * 7; - // bap=3 : each mantissa is 3 bits - bits += mant_cnt[3] * 3; - return bits; -} - - /** * Calculate masking curve based on the final exponents. * Also calculate the power spectral densities to use in future calculations. @@ -1491,38 +1475,60 @@ static void reset_block_bap(AC3EncodeContext *s) } -static int count_mantissa_bits(AC3EncodeContext *s) +/** + * Initialize mantissa counts. + * These are set so that they are padded to the next whole group size when bits + * are counted in compute_mantissa_size. + */ +static void count_mantissa_bits_init(uint16_t mant_cnt[AC3_MAX_BLOCKS][16]) { - int blk, ch; - int mantissa_bits; - int mant_cnt[5]; + int blk; + + for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { + memset(mant_cnt[blk], 0, sizeof(mant_cnt[blk])); + mant_cnt[blk][1] = mant_cnt[blk][2] = 2; + mant_cnt[blk][4] = 1; + } +} + + +/** + * Update mantissa bit counts for all blocks in 1 channel in a given bandwidth + * range. + */ +static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch, + uint16_t mant_cnt[AC3_MAX_BLOCKS][16], + int start, int end) +{ + int blk; - mantissa_bits = 0; for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { AC3Block *block = &s->blocks[blk]; - int av_uninit(ch0); - int got_cpl = !block->cpl_in_use; - // initialize grouped mantissa counts. these are set so that they are - // padded to the next whole group size when bits are counted in - // compute_mantissa_size_final - mant_cnt[0] = mant_cnt[3] = 0; - mant_cnt[1] = mant_cnt[2] = 2; - mant_cnt[4] = 1; - for (ch = 1; ch <= s->channels; ch++) { - if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) { - ch0 = ch - 1; - ch = CPL_CH; - got_cpl = 1; - } - mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt, - s->ref_bap[ch][blk]+s->start_freq[ch], - block->end_freq[ch]-s->start_freq[ch]); - if (ch == CPL_CH) - ch = ch0; - } - mantissa_bits += compute_mantissa_size_final(mant_cnt); + if (ch == CPL_CH && !block->cpl_in_use) + continue; + s->ac3dsp.update_bap_counts(mant_cnt[blk], + s->ref_bap[ch][blk] + start, + FFMIN(end, block->end_freq[ch]) - start); } - return mantissa_bits; +} + + +/** + * Count the number of mantissa bits in the frame based on the bap values. + */ +static int count_mantissa_bits(AC3EncodeContext *s) +{ + int ch, max_end_freq; + LOCAL_ALIGNED_16(uint16_t, mant_cnt,[AC3_MAX_BLOCKS][16]); + + count_mantissa_bits_init(mant_cnt); + + max_end_freq = s->bandwidth_code * 3 + 73; + for (ch = !s->cpl_enabled; ch <= s->channels; ch++) + count_mantissa_bits_update_ch(s, ch, mant_cnt, s->start_freq[ch], + max_end_freq); + + return s->ac3dsp.compute_mantissa_size(mant_cnt); } diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S deleted file mode 100644 index d7d498e41f..0000000000 --- a/libavcodec/arm/ac3dsp_arm.S +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2011 Mans Rullgard - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "asm.S" - -function ff_ac3_compute_mantissa_size_arm, export=1 - push {r4-r8,lr} - ldm r0, {r4-r8} - mov r3, r0 - mov r0, #0 -1: - ldrb lr, [r1], #1 - subs r2, r2, #1 - blt 2f - cmp lr, #4 - bgt 3f - subs lr, lr, #1 - addlt r4, r4, #1 - addeq r5, r5, #1 - ble 1b - subs lr, lr, #2 - addlt r6, r6, #1 - addeq r7, r7, #1 - addgt r8, r8, #1 - b 1b -3: - cmp lr, #14 - sublt lr, lr, #1 - addgt r0, r0, #16 - addle r0, r0, lr - b 1b -2: - stm r3, {r4-r8} - pop {r4-r8,pc} -endfunc diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c index fd78e1e6a4..4414dc8170 100644 --- a/libavcodec/arm/ac3dsp_init_arm.c +++ b/libavcodec/arm/ac3dsp_init_arm.c @@ -39,8 +39,6 @@ int ff_ac3_compute_mantissa_size_arm(int cnt[5], uint8_t *bap, int nb_coefs); av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) { - c->compute_mantissa_size = ff_ac3_compute_mantissa_size_arm; - if (HAVE_ARMV6) { c->bit_alloc_calc_bap = ff_ac3_bit_alloc_calc_bap_armv6; } diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 18f9dc3894..0d8f4b78eb 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -27,6 +27,11 @@ SECTION_RODATA ; 16777216.0f - used in ff_float_to_fixed24() pf_1_24: times 4 dd 0x4B800000 +; used in ff_ac3_compute_mantissa_size() +cextern ac3_bap_bits +pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768 +pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 + SECTION .text ;----------------------------------------------------------------------------- @@ -293,3 +298,51 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len %endif ja .loop REP_RET + +;------------------------------------------------------------------------------ +; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16]) +;------------------------------------------------------------------------------ + +%macro PHADDD4 2 ; xmm src, xmm tmp + movhlps %2, %1 + paddd %1, %2 + pshufd %2, %1, 0x1 + paddd %1, %2 +%endmacro + +INIT_XMM +cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum + movdqa m0, [mant_cntq ] + movdqa m1, [mant_cntq+ 1*16] + paddw m0, [mant_cntq+ 2*16] + paddw m1, [mant_cntq+ 3*16] + paddw m0, [mant_cntq+ 4*16] + paddw m1, [mant_cntq+ 5*16] + paddw m0, [mant_cntq+ 6*16] + paddw m1, [mant_cntq+ 7*16] + paddw m0, [mant_cntq+ 8*16] + paddw m1, [mant_cntq+ 9*16] + paddw m0, [mant_cntq+10*16] + paddw m1, [mant_cntq+11*16] + pmaddwd m0, [ff_ac3_bap_bits ] + pmaddwd m1, [ff_ac3_bap_bits+16] + paddd m0, m1 + PHADDD4 m0, m1 + movd sumd, m0 + movdqa m3, [pw_bap_mul1] + movhpd m0, [mant_cntq +2] + movlpd m0, [mant_cntq+1*32+2] + movhpd m1, [mant_cntq+2*32+2] + movlpd m1, [mant_cntq+3*32+2] + movhpd m2, [mant_cntq+4*32+2] + movlpd m2, [mant_cntq+5*32+2] + pmulhuw m0, m3 + pmulhuw m1, m3 + pmulhuw m2, m3 + paddusw m0, m1 + paddusw m0, m2 + pmaddwd m0, [pw_bap_mul2] + PHADDD4 m0, m1 + movd eax, m0 + add eax, sumd + RET diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c index 475042395c..2664736bb6 100644 --- a/libavcodec/x86/ac3dsp_mmx.c +++ b/libavcodec/x86/ac3dsp_mmx.c @@ -42,6 +42,8 @@ extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned i extern void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len); extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len); +extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]); + av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) { int mm_flags = av_get_cpu_flags(); @@ -69,6 +71,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) c->ac3_exponent_min = ff_ac3_exponent_min_sse2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2; + c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;