diff --git a/libavcodec/aac.h b/libavcodec/aac.h index da683b0071..89f838eab5 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -32,11 +32,6 @@ #include "aac_defines.h" -#include "libavutil/channel_layout.h" -#include "libavutil/mem_internal.h" -#include "sbr.h" - -#include #define MAX_CHANNELS 64 #define MAX_ELEM_ID 16 @@ -44,8 +39,6 @@ #define TNS_MAX_ORDER 20 #define MAX_LTP_LONG_SFB 40 -#define CLIP_AVOIDANCE_FACTOR 0.95f - enum RawDataBlockType { TYPE_SCE, TYPE_CPE, @@ -83,8 +76,6 @@ enum BandType { INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions (in phase). }; -#define IS_CODEBOOK_UNSIGNED(x) (((x) - 1) & 10) - enum ChannelPosition { AAC_CHANNEL_OFF = 0, AAC_CHANNEL_FRONT = 1, @@ -94,15 +85,6 @@ enum ChannelPosition { AAC_CHANNEL_CC = 5, }; -/** - * The point during decoding at which channel coupling is applied. - */ -enum CouplingPoint { - BEFORE_TNS, - BETWEEN_TNS_AND_IMDCT, - AFTER_IMDCT = 3, -}; - /** * Predictor State */ @@ -131,54 +113,6 @@ typedef struct PredictorState { #define NOISE_PRE_BITS 9 ///< length of preamble #define NOISE_OFFSET 90 ///< subtracted from global gain, used as offset for the preamble -/** - * Long Term Prediction - */ -typedef struct LongTermPrediction { - int8_t present; - int16_t lag; - int coef_idx; - INTFLOAT coef; - int8_t used[MAX_LTP_LONG_SFB]; -} LongTermPrediction; - -/** - * Individual Channel Stream - */ -typedef struct IndividualChannelStream { - uint8_t max_sfb; ///< number of scalefactor bands per group - enum WindowSequence window_sequence[2]; - uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sine window. - int num_window_groups; - uint8_t group_len[8]; - LongTermPrediction ltp; - const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window - const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window - int num_swb; ///< number of scalefactor window bands - int num_windows; - int tns_max_bands; - int predictor_present; - int predictor_initialized; - int predictor_reset_group; - int predictor_reset_count[31]; ///< used by encoder to count prediction resets - uint8_t prediction_used[41]; - uint8_t window_clipping[8]; ///< set if a certain window is near clipping - float clip_avoidance_factor; ///< set if any window is near clipping to the necessary atennuation factor to avoid it -} IndividualChannelStream; - -/** - * Temporal Noise Shaping - */ -typedef struct TemporalNoiseShaping { - int present; - int n_filt[8]; - int length[8][4]; - int direction[8][4]; - int order[8][4]; - int coef_idx[8][4][TNS_MAX_ORDER]; - INTFLOAT coef[8][4][TNS_MAX_ORDER]; -} TemporalNoiseShaping; - typedef struct Pulse { int num_pulse; int start; @@ -186,63 +120,4 @@ typedef struct Pulse { int amp[4]; } Pulse; -/** - * coupling parameters - */ -typedef struct ChannelCoupling { - enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied. - int num_coupled; ///< number of target elements - enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE. - int id_select[8]; ///< element id - int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel; - * [2] list of gains for left channel; [3] lists of gains for both channels - */ - INTFLOAT gain[16][120]; -} ChannelCoupling; - -/** - * Single Channel Element - used for both SCE and LFE elements. - */ -typedef struct SingleChannelElement { - IndividualChannelStream ics; - TemporalNoiseShaping tns; - Pulse pulse; - enum BandType band_type[128]; ///< band types - enum BandType band_alt[128]; ///< alternative band type (used by encoder) - int band_type_run_end[120]; ///< band type run end points - INTFLOAT sf[120]; ///< scalefactors - int sf_idx[128]; ///< scalefactor indices (used by encoder) - uint8_t zeroes[128]; ///< band is not coded (used by encoder) - uint8_t can_pns[128]; ///< band is allowed to PNS (informative) - float is_ener[128]; ///< Intensity stereo pos (used by encoder) - float pns_ener[128]; ///< Noise energy values (used by encoder) - DECLARE_ALIGNED(32, INTFLOAT, pcoeffs)[1024]; ///< coefficients for IMDCT, pristine - DECLARE_ALIGNED(32, INTFLOAT, coeffs)[1024]; ///< coefficients for IMDCT, maybe processed - DECLARE_ALIGNED(32, INTFLOAT, saved)[1536]; ///< overlap - DECLARE_ALIGNED(32, INTFLOAT, ret_buf)[2048]; ///< PCM output buffer - DECLARE_ALIGNED(16, INTFLOAT, ltp_state)[3072]; ///< time signal for LTP - DECLARE_ALIGNED(32, AAC_FLOAT, lcoeffs)[1024]; ///< MDCT of LTP coefficients (used by encoder) - DECLARE_ALIGNED(32, AAC_FLOAT, prcoeffs)[1024]; ///< Main prediction coefs (used by encoder) - PredictorState predictor_state[MAX_PREDICTORS]; - INTFLOAT *ret; ///< PCM output -} SingleChannelElement; - -/** - * channel element - generic struct for SCE/CPE/CCE/LFE - */ -typedef struct ChannelElement { - int present; - // CPE specific - int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream. - int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder) - uint8_t is_mode; ///< Set if any bands have been encoded using intensity stereo (used by encoder) - uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band - uint8_t is_mask[128]; ///< Set if intensity stereo is used (used by encoder) - // shared - SingleChannelElement ch[2]; - // CCE specific - ChannelCoupling coup; - SpectralBandReplication sbr; -} ChannelElement; - #endif /* AVCODEC_AAC_H */ diff --git a/libavcodec/aacdec.h b/libavcodec/aacdec.h index 37a318659e..e23310b5b1 100644 --- a/libavcodec/aacdec.h +++ b/libavcodec/aacdec.h @@ -30,13 +30,18 @@ #ifndef AVCODEC_AACDEC_H #define AVCODEC_AACDEC_H +#include + +#include "libavutil/channel_layout.h" #include "libavutil/float_dsp.h" #include "libavutil/fixed_dsp.h" #include "libavutil/mem_internal.h" #include "libavutil/tx.h" #include "aac.h" +#include "aac_defines.h" #include "mpeg4audio.h" +#include "sbr.h" /** * Output configuration status @@ -54,6 +59,103 @@ enum AACOutputChannelOrder { CHANNEL_ORDER_CODED, }; +/** + * The point during decoding at which channel coupling is applied. + */ +enum CouplingPoint { + BEFORE_TNS, + BETWEEN_TNS_AND_IMDCT, + AFTER_IMDCT = 3, +}; + +/** + * Long Term Prediction + */ +typedef struct LongTermPrediction { + int8_t present; + int16_t lag; + INTFLOAT coef; + int8_t used[MAX_LTP_LONG_SFB]; +} LongTermPrediction; + +/** + * Individual Channel Stream + */ +typedef struct IndividualChannelStream { + uint8_t max_sfb; ///< number of scalefactor bands per group + enum WindowSequence window_sequence[2]; + uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sine window. + int num_window_groups; + uint8_t group_len[8]; + LongTermPrediction ltp; + const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window + int num_swb; ///< number of scalefactor window bands + int num_windows; + int tns_max_bands; + int predictor_present; + int predictor_initialized; + int predictor_reset_group; + uint8_t prediction_used[41]; + uint8_t window_clipping[8]; ///< set if a certain window is near clipping +} IndividualChannelStream; + +/** + * Temporal Noise Shaping + */ +typedef struct TemporalNoiseShaping { + int present; + int n_filt[8]; + int length[8][4]; + int direction[8][4]; + int order[8][4]; + INTFLOAT coef[8][4][TNS_MAX_ORDER]; +} TemporalNoiseShaping; + +/** + * coupling parameters + */ +typedef struct ChannelCoupling { + enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied. + int num_coupled; ///< number of target elements + enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE. + int id_select[8]; ///< element id + int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel; + * [2] list of gains for left channel; [3] lists of gains for both channels + */ + INTFLOAT gain[16][120]; +} ChannelCoupling; + +/** + * Single Channel Element - used for both SCE and LFE elements. + */ +typedef struct SingleChannelElement { + IndividualChannelStream ics; + TemporalNoiseShaping tns; + enum BandType band_type[128]; ///< band types + int band_type_run_end[120]; ///< band type run end points + INTFLOAT sf[120]; ///< scalefactors + DECLARE_ALIGNED(32, INTFLOAT, coeffs)[1024]; ///< coefficients for IMDCT, maybe processed + DECLARE_ALIGNED(32, INTFLOAT, saved)[1536]; ///< overlap + DECLARE_ALIGNED(32, INTFLOAT, ret_buf)[2048]; ///< PCM output buffer + DECLARE_ALIGNED(16, INTFLOAT, ltp_state)[3072]; ///< time signal for LTP + PredictorState predictor_state[MAX_PREDICTORS]; + INTFLOAT *ret; ///< PCM output +} SingleChannelElement; + +/** + * channel element - generic struct for SCE/CPE/CCE/LFE + */ +typedef struct ChannelElement { + int present; + // CPE specific + uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band + // shared + SingleChannelElement ch[2]; + // CCE specific + ChannelCoupling coup; + SpectralBandReplication sbr; +} ChannelElement; + typedef struct OutputConfiguration { MPEG4AudioConfig m4ac; uint8_t layout_map[MAX_ELEM_ID*4][3]; diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index 18b424736d..752f1c26b2 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -22,9 +22,12 @@ #ifndef AVCODEC_AACENC_H #define AVCODEC_AACENC_H +#include + #include "libavutil/channel_layout.h" #include "libavutil/float_dsp.h" #include "libavutil/mem_internal.h" +#include "libavutil/tx.h" #include "avcodec.h" #include "put_bits.h" @@ -35,6 +38,8 @@ #include "lpc.h" +#define CLIP_AVOIDANCE_FACTOR 0.95f + typedef enum AACCoder { AAC_CODER_ANMR = 0, AAC_CODER_TWOLOOP, @@ -54,6 +59,90 @@ typedef struct AACEncOptions { int intensity_stereo; } AACEncOptions; +/** + * Long Term Prediction + */ +typedef struct LongTermPrediction { + int8_t present; + int16_t lag; + int coef_idx; + float coef; + int8_t used[MAX_LTP_LONG_SFB]; +} LongTermPrediction; + +/** + * Individual Channel Stream + */ +typedef struct IndividualChannelStream { + uint8_t max_sfb; ///< number of scalefactor bands per group + enum WindowSequence window_sequence[2]; + uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sine window. + uint8_t group_len[8]; + LongTermPrediction ltp; + const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window + const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window + int num_swb; ///< number of scalefactor window bands + int num_windows; + int tns_max_bands; + int predictor_present; + int predictor_initialized; + int predictor_reset_group; + int predictor_reset_count[31]; ///< used to count prediction resets + uint8_t prediction_used[41]; + uint8_t window_clipping[8]; ///< set if a certain window is near clipping + float clip_avoidance_factor; ///< set if any window is near clipping to the necessary atennuation factor to avoid it +} IndividualChannelStream; + +/** + * Temporal Noise Shaping + */ +typedef struct TemporalNoiseShaping { + int present; + int n_filt[8]; + int length[8][4]; + int direction[8][4]; + int order[8][4]; + int coef_idx[8][4][TNS_MAX_ORDER]; + float coef[8][4][TNS_MAX_ORDER]; +} TemporalNoiseShaping; + +/** + * Single Channel Element - used for both SCE and LFE elements. + */ +typedef struct SingleChannelElement { + IndividualChannelStream ics; + TemporalNoiseShaping tns; + Pulse pulse; + enum BandType band_type[128]; ///< band types + enum BandType band_alt[128]; ///< alternative band type + int sf_idx[128]; ///< scalefactor indices + uint8_t zeroes[128]; ///< band is not coded + uint8_t can_pns[128]; ///< band is allowed to PNS (informative) + float is_ener[128]; ///< Intensity stereo pos + float pns_ener[128]; ///< Noise energy values + DECLARE_ALIGNED(32, float, pcoeffs)[1024]; ///< coefficients for IMDCT, pristine + DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT, maybe processed + DECLARE_ALIGNED(32, float, ret_buf)[2048]; ///< PCM output buffer + DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP + DECLARE_ALIGNED(32, float, lcoeffs)[1024]; ///< MDCT of LTP coefficients + DECLARE_ALIGNED(32, float, prcoeffs)[1024]; ///< Main prediction coefs + PredictorState predictor_state[MAX_PREDICTORS]; +} SingleChannelElement; + +/** + * channel element - generic struct for SCE/CPE/CCE/LFE + */ +typedef struct ChannelElement { + // CPE specific + int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream. + int ms_mode; ///< Signals mid/side stereo flags coding mode + uint8_t is_mode; ///< Set if any bands have been encoded using intensity stereo + uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band + uint8_t is_mask[128]; ///< Set if intensity stereo is used + // shared + SingleChannelElement ch[2]; +} ChannelElement; + struct AACEncContext; typedef struct AACCoefficientsEncoder { diff --git a/libavcodec/aacenc_utils.h b/libavcodec/aacenc_utils.h index bef4c103f3..ef2218e036 100644 --- a/libavcodec/aacenc_utils.h +++ b/libavcodec/aacenc_utils.h @@ -29,7 +29,7 @@ #define AVCODEC_AACENC_UTILS_H #include "libavutil/ffmath.h" -#include "aac.h" +#include "aacenc.h" #include "aacenctab.h" #include "aactab.h" diff --git a/libavcodec/aacenctab.h b/libavcodec/aacenctab.h index 20e47ea900..f2d6f597bc 100644 --- a/libavcodec/aacenctab.h +++ b/libavcodec/aacenctab.h @@ -30,6 +30,7 @@ #include "libavutil/channel_layout.h" #include "aac.h" +#include "defs.h" /** Total number of usable codebooks **/ #define CB_TOT 12 diff --git a/libavcodec/sbrdsp_template.c b/libavcodec/sbrdsp_template.c index 79cd2156d9..c1e583ea56 100644 --- a/libavcodec/sbrdsp_template.c +++ b/libavcodec/sbrdsp_template.c @@ -20,7 +20,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "config.h" #include "libavutil/attributes_internal.h" +#include "libavutil/mem_internal.h" static void sbr_sum64x5_c(INTFLOAT *z) {