ffmpeg/libavcodec/aac/aacdec_usac.c

1791 lines
59 KiB
C

/*
* Copyright (c) 2024 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "aacdec_usac.h"
#include "aacdec_tab.h"
#include "aacdec_lpd.h"
#include "aacdec_ac.h"
#include "libavcodec/aacsbr.h"
#include "libavcodec/aactab.h"
#include "libavutil/mem.h"
#include "libavcodec/mpeg4audio.h"
#include "libavcodec/unary.h"
/* Number of scalefactor bands per complex prediction band, equal to 2. */
#define SFB_PER_PRED_BAND 2
static inline uint32_t get_escaped_value(GetBitContext *gb, int nb1, int nb2, int nb3)
{
uint32_t val = get_bits(gb, nb1), val2;
if (val < ((1 << nb1) - 1))
return val;
val += val2 = get_bits(gb, nb2);
if (nb3 && (val2 == ((1 << nb2) - 1)))
val += get_bits(gb, nb3);
return val;
}
/* ISO/IEC 23003-3, Table 74 — bsOutputChannelPos */
static const enum AVChannel usac_ch_pos_to_av[64] = {
[0] = AV_CHAN_FRONT_LEFT,
[1] = AV_CHAN_FRONT_RIGHT,
[2] = AV_CHAN_FRONT_CENTER,
[3] = AV_CHAN_LOW_FREQUENCY,
[4] = AV_CHAN_SIDE_LEFT, // +110 degrees, Ls|LS|kAudioChannelLabel_LeftSurround
[5] = AV_CHAN_SIDE_RIGHT, // -110 degrees, Rs|RS|kAudioChannelLabel_RightSurround
[6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
[7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
[8] = AV_CHAN_BACK_LEFT, // +135 degrees, Lsr|BL|kAudioChannelLabel_RearSurroundLeft
[9] = AV_CHAN_BACK_RIGHT, // -135 degrees, Rsr|BR|kAudioChannelLabel_RearSurroundRight
[10] = AV_CHAN_BACK_CENTER,
[11] = AV_CHAN_SURROUND_DIRECT_LEFT,
[12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
[13] = AV_CHAN_SIDE_SURROUND_LEFT, // +90 degrees, Lss|SL|kAudioChannelLabel_LeftSideSurround
[14] = AV_CHAN_SIDE_SURROUND_RIGHT, // -90 degrees, Rss|SR|kAudioChannelLabel_RightSideSurround
[15] = AV_CHAN_WIDE_LEFT, // +60 degrees, Lw|FLw|kAudioChannelLabel_LeftWide
[16] = AV_CHAN_WIDE_RIGHT, // -60 degrees, Rw|FRw|kAudioChannelLabel_RightWide
[17] = AV_CHAN_TOP_FRONT_LEFT,
[18] = AV_CHAN_TOP_FRONT_RIGHT,
[19] = AV_CHAN_TOP_FRONT_CENTER,
[20] = AV_CHAN_TOP_BACK_LEFT,
[21] = AV_CHAN_TOP_BACK_RIGHT,
[22] = AV_CHAN_TOP_BACK_CENTER,
[23] = AV_CHAN_TOP_SIDE_LEFT,
[24] = AV_CHAN_TOP_SIDE_RIGHT,
[25] = AV_CHAN_TOP_CENTER,
[26] = AV_CHAN_LOW_FREQUENCY_2,
[27] = AV_CHAN_BOTTOM_FRONT_LEFT,
[28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
[29] = AV_CHAN_BOTTOM_FRONT_CENTER,
[30] = AV_CHAN_TOP_SURROUND_LEFT, ///< +110 degrees, Lvs, TpLS
[31] = AV_CHAN_TOP_SURROUND_RIGHT, ///< -110 degrees, Rvs, TpRS
};
static int decode_loudness_info(AACDecContext *ac, AACUSACLoudnessInfo *info,
GetBitContext *gb)
{
info->drc_set_id = get_bits(gb, 6);
info->downmix_id = get_bits(gb, 7);
if ((info->sample_peak.present = get_bits1(gb))) /* samplePeakLevelPresent */
info->sample_peak.lvl = get_bits(gb, 12);
if ((info->true_peak.present = get_bits1(gb))) { /* truePeakLevelPresent */
info->true_peak.lvl = get_bits(gb, 12);
info->true_peak.measurement = get_bits(gb, 4);
info->true_peak.reliability = get_bits(gb, 2);
}
info->nb_measurements = get_bits(gb, 4);
for (int i = 0; i < info->nb_measurements; i++) {
info->measurements[i].method_def = get_bits(gb, 4);
info->measurements[i].method_val = get_unary(gb, 0, 8);
info->measurements[i].measurement = get_bits(gb, 4);
info->measurements[i].reliability = get_bits(gb, 2);
}
return 0;
}
static int decode_loudness_set(AACDecContext *ac, AACUSACConfig *usac,
GetBitContext *gb)
{
int ret;
usac->loudness.nb_album = get_bits(gb, 6); /* loudnessInfoAlbumCount */
usac->loudness.nb_info = get_bits(gb, 6); /* loudnessInfoCount */
for (int i = 0; i < usac->loudness.nb_album; i++) {
ret = decode_loudness_info(ac, &usac->loudness.album_info[i], gb);
if (ret < 0)
return ret;
}
for (int i = 0; i < usac->loudness.nb_info; i++) {
ret = decode_loudness_info(ac, &usac->loudness.info[i], gb);
if (ret < 0)
return ret;
}
if (get_bits1(gb)) { /* loudnessInfoSetExtPresent */
enum AACUSACLoudnessExt type;
while ((type = get_bits(gb, 4)) != UNIDRCLOUDEXT_TERM) {
uint8_t size_bits = get_bits(gb, 4) + 4;
uint8_t bit_size = get_bits(gb, size_bits) + 1;
switch (type) {
case UNIDRCLOUDEXT_EQ:
avpriv_report_missing_feature(ac->avctx, "loudnessInfoV1");
return AVERROR_PATCHWELCOME;
default:
for (int i = 0; i < bit_size; i++)
skip_bits1(gb);
}
}
}
return 0;
}
static int decode_usac_sbr_data(AACDecContext *ac,
AACUsacElemConfig *e, GetBitContext *gb)
{
uint8_t header_extra1;
uint8_t header_extra2;
e->sbr.harmonic_sbr = get_bits1(gb); /* harmonicSBR */
e->sbr.bs_intertes = get_bits1(gb); /* bs_interTes */
e->sbr.bs_pvc = get_bits1(gb); /* bs_pvc */
if (e->sbr.harmonic_sbr || e->sbr.bs_intertes || e->sbr.bs_pvc) {
avpriv_report_missing_feature(ac->avctx, "AAC USAC eSBR");
return AVERROR_PATCHWELCOME;
}
e->sbr.dflt.start_freq = get_bits(gb, 4); /* dflt_start_freq */
e->sbr.dflt.stop_freq = get_bits(gb, 4); /* dflt_stop_freq */
header_extra1 = get_bits1(gb); /* dflt_header_extra1 */
header_extra2 = get_bits1(gb); /* dflt_header_extra2 */
e->sbr.dflt.freq_scale = 2;
e->sbr.dflt.alter_scale = 1;
e->sbr.dflt.noise_bands = 2;
if (header_extra1) {
e->sbr.dflt.freq_scale = get_bits(gb, 2); /* dflt_freq_scale */
e->sbr.dflt.alter_scale = get_bits1(gb); /* dflt_alter_scale */
e->sbr.dflt.noise_bands = get_bits(gb, 2); /* dflt_noise_bands */
}
e->sbr.dflt.limiter_bands = 2;
e->sbr.dflt.limiter_gains = 2;
e->sbr.dflt.interpol_freq = 1;
e->sbr.dflt.smoothing_mode = 1;
if (header_extra2) {
e->sbr.dflt.limiter_bands = get_bits(gb, 2); /* dflt_limiter_bands */
e->sbr.dflt.limiter_gains = get_bits(gb, 2); /* dflt_limiter_gains */
e->sbr.dflt.interpol_freq = get_bits1(gb); /* dflt_interpol_freq */
e->sbr.dflt.smoothing_mode = get_bits1(gb); /* dflt_smoothing_mode */
}
return 0;
}
static void decode_usac_element_core(AACUsacElemConfig *e,
GetBitContext *gb,
int sbr_ratio)
{
e->tw_mdct = get_bits1(gb); /* tw_mdct */
e->noise_fill = get_bits1(gb);
e->sbr.ratio = sbr_ratio;
}
static int decode_usac_element_pair(AACDecContext *ac,
AACUsacElemConfig *e, GetBitContext *gb)
{
e->stereo_config_index = 0;
if (e->sbr.ratio) {
int ret = decode_usac_sbr_data(ac, e, gb);
if (ret < 0)
return ret;
e->stereo_config_index = get_bits(gb, 2);
}
if (e->stereo_config_index) {
e->mps.freq_res = get_bits(gb, 3); /* bsFreqRes */
e->mps.fixed_gain = get_bits(gb, 3); /* bsFixedGainDMX */
e->mps.temp_shape_config = get_bits(gb, 2); /* bsTempShapeConfig */
e->mps.decorr_config = get_bits(gb, 2); /* bsDecorrConfig */
e->mps.high_rate_mode = get_bits1(gb); /* bsHighRateMode */
e->mps.phase_coding = get_bits1(gb); /* bsPhaseCoding */
if (get_bits1(gb)) /* bsOttBandsPhasePresent */
e->mps.otts_bands_phase = get_bits(gb, 5); /* bsOttBandsPhase */
e->mps.residual_coding = e->stereo_config_index >= 2; /* bsResidualCoding */
if (e->mps.residual_coding) {
e->mps.residual_bands = get_bits(gb, 5); /* bsResidualBands */
e->mps.pseudo_lr = get_bits1(gb); /* bsPseudoLr */
}
if (e->mps.temp_shape_config == 2)
e->mps.env_quant_mode = get_bits1(gb); /* bsEnvQuantMode */
}
return 0;
}
static int decode_usac_extension(AACDecContext *ac, AACUsacElemConfig *e,
GetBitContext *gb)
{
int len = 0, ext_config_len;
e->ext.type = get_escaped_value(gb, 4, 8, 16); /* usacExtElementType */
ext_config_len = get_escaped_value(gb, 4, 8, 16); /* usacExtElementConfigLength */
if (get_bits1(gb)) /* usacExtElementDefaultLengthPresent */
len = get_escaped_value(gb, 8, 16, 0) + 1;
e->ext.default_len = len;
e->ext.payload_frag = get_bits1(gb); /* usacExtElementPayloadFrag */
av_log(ac->avctx, AV_LOG_DEBUG, "Extension present: type %i, len %i\n",
e->ext.type, ext_config_len);
switch (e->ext.type) {
#if 0 /* Skip unsupported values */
case ID_EXT_ELE_MPEGS:
break;
case ID_EXT_ELE_SAOC:
break;
case ID_EXT_ELE_UNI_DRC:
break;
#endif
case ID_EXT_ELE_FILL:
break; /* This is what the spec does */
case ID_EXT_ELE_AUDIOPREROLL:
/* No configuration needed - fallthrough (len should be 0) */
default:
skip_bits(gb, 8*ext_config_len);
break;
};
return 0;
}
int ff_aac_usac_reset_state(AACDecContext *ac, OutputConfiguration *oc)
{
AACUSACConfig *usac = &oc->usac;
int elem_id[3 /* SCE, CPE, LFE */] = { 0, 0, 0 };
ChannelElement *che;
enum RawDataBlockType type;
int id, ch;
/* Initialize state */
for (int i = 0; i < usac->nb_elems; i++) {
AACUsacElemConfig *e = &usac->elems[i];
if (e->type == ID_USAC_EXT)
continue;
switch (e->type) {
case ID_USAC_SCE:
ch = 1;
type = TYPE_SCE;
id = elem_id[0]++;
break;
case ID_USAC_CPE:
ch = 2;
type = TYPE_CPE;
id = elem_id[1]++;
break;
case ID_USAC_LFE:
ch = 1;
type = TYPE_LFE;
id = elem_id[2]++;
break;
}
che = ff_aac_get_che(ac, type, id);
if (che) {
AACUsacStereo *us = &che->us;
memset(us, 0, sizeof(*us));
if (e->sbr.ratio)
ff_aac_sbr_config_usac(ac, che, e);
for (int j = 0; j < ch; j++) {
SingleChannelElement *sce = &che->ch[ch];
AACUsacElemData *ue = &sce->ue;
memset(ue, 0, sizeof(*ue));
if (!ch)
ue->noise.seed = 0x3039;
else
che->ch[1].ue.noise.seed = 0x10932;
}
}
}
return 0;
}
/* UsacConfig */
int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx,
GetBitContext *gb, OutputConfiguration *oc,
int channel_config)
{
int ret;
uint8_t freq_idx;
uint8_t channel_config_idx;
int nb_channels = 0;
int ratio_mult, ratio_dec;
int samplerate;
int sbr_ratio;
MPEG4AudioConfig *m4ac = &oc->m4ac;
AACUSACConfig *usac = &oc->usac;
int elem_id[3 /* SCE, CPE, LFE */];
int map_pos_set = 0;
uint8_t layout_map[MAX_ELEM_ID*4][3] = { 0 };
if (!ac)
return AVERROR_PATCHWELCOME;
memset(usac, 0, sizeof(*usac));
freq_idx = get_bits(gb, 5); /* usacSamplingFrequencyIndex */
if (freq_idx == 0x1f) {
samplerate = get_bits(gb, 24); /* usacSamplingFrequency */
} else {
samplerate = ff_aac_usac_samplerate[freq_idx];
if (samplerate < 0)
return AVERROR(EINVAL);
}
usac->core_sbr_frame_len_idx = get_bits(gb, 3); /* coreSbrFrameLengthIndex */
m4ac->frame_length_short = usac->core_sbr_frame_len_idx == 0 ||
usac->core_sbr_frame_len_idx == 2;
usac->core_frame_len = (usac->core_sbr_frame_len_idx == 0 ||
usac->core_sbr_frame_len_idx == 2) ? 768 : 1024;
sbr_ratio = usac->core_sbr_frame_len_idx == 2 ? 2 :
usac->core_sbr_frame_len_idx == 3 ? 3 :
usac->core_sbr_frame_len_idx == 4 ? 1 :
0;
if (sbr_ratio == 2) {
ratio_mult = 8;
ratio_dec = 3;
} else if (sbr_ratio == 3) {
ratio_mult = 2;
ratio_dec = 1;
} else if (sbr_ratio == 4) {
ratio_mult = 4;
ratio_dec = 1;
} else {
ratio_mult = 1;
ratio_dec = 1;
}
avctx->sample_rate = samplerate;
m4ac->ext_sample_rate = samplerate;
m4ac->sample_rate = (samplerate * ratio_dec) / ratio_mult;
m4ac->sampling_index = ff_aac_sample_rate_idx(m4ac->sample_rate);
m4ac->sbr = sbr_ratio > 0;
channel_config_idx = get_bits(gb, 5); /* channelConfigurationIndex */
if (!channel_config_idx) {
/* UsacChannelConfig() */
nb_channels = get_escaped_value(gb, 5, 8, 16); /* numOutChannels */
if (nb_channels > 64)
return AVERROR(EINVAL);
av_channel_layout_uninit(&ac->oc[1].ch_layout);
ret = av_channel_layout_custom_init(&ac->oc[1].ch_layout, nb_channels);
if (ret < 0)
return ret;
for (int i = 0; i < nb_channels; i++) {
AVChannelCustom *cm = &ac->oc[1].ch_layout.u.map[i];
cm->id = usac_ch_pos_to_av[get_bits(gb, 5)]; /* bsOutputChannelPos */
}
ret = av_channel_layout_retype(&ac->oc[1].ch_layout,
AV_CHANNEL_ORDER_NATIVE,
AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);
if (ret < 0)
return ret;
ret = av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout);
if (ret < 0)
return ret;
} else {
int nb_elements;
if ((ret = ff_aac_set_default_channel_config(ac, avctx, layout_map,
&nb_elements, channel_config_idx)))
return ret;
/* Fill in the number of expected channels */
for (int i = 0; i < nb_elements; i++)
nb_channels += layout_map[i][0] == TYPE_CPE ? 2 : 1;
map_pos_set = 1;
}
/* UsacDecoderConfig */
elem_id[0] = elem_id[1] = elem_id[2] = 0;
usac->nb_elems = get_escaped_value(gb, 4, 8, 16) + 1;
if (usac->nb_elems > 64) {
av_log(ac->avctx, AV_LOG_ERROR, "Too many elements: %i\n",
usac->nb_elems);
usac->nb_elems = 0;
return AVERROR(EINVAL);
}
for (int i = 0; i < usac->nb_elems; i++) {
int map_count = elem_id[0] + elem_id[1] + elem_id[2];
AACUsacElemConfig *e = &usac->elems[i];
memset(e, 0, sizeof(*e));
e->type = get_bits(gb, 2); /* usacElementType */
if (e->type != ID_USAC_EXT && (map_count + 1) > nb_channels) {
av_log(ac->avctx, AV_LOG_ERROR, "Too many channels for the channel "
"configuration\n");
usac->nb_elems = 0;
return AVERROR(EINVAL);
}
av_log(ac->avctx, AV_LOG_DEBUG, "Element present: idx %i, type %i\n",
i, e->type);
switch (e->type) {
case ID_USAC_SCE: /* SCE */
/* UsacCoreConfig */
decode_usac_element_core(e, gb, sbr_ratio);
if (e->sbr.ratio > 0) {
ret = decode_usac_sbr_data(ac, e, gb);
if (ret < 0)
return ret;
}
layout_map[map_count][0] = TYPE_SCE;
layout_map[map_count][1] = elem_id[0]++;
if (!map_pos_set)
layout_map[map_count][2] = AAC_CHANNEL_FRONT;
break;
case ID_USAC_CPE: /* UsacChannelPairElementConf */
/* UsacCoreConfig */
decode_usac_element_core(e, gb, sbr_ratio);
ret = decode_usac_element_pair(ac, e, gb);
if (ret < 0)
return ret;
layout_map[map_count][0] = TYPE_CPE;
layout_map[map_count][1] = elem_id[1]++;
if (!map_pos_set)
layout_map[map_count][2] = AAC_CHANNEL_FRONT;
break;
case ID_USAC_LFE: /* LFE */
/* LFE has no need for any configuration */
e->tw_mdct = 0;
e->noise_fill = 0;
layout_map[map_count][0] = TYPE_LFE;
layout_map[map_count][1] = elem_id[2]++;
if (!map_pos_set)
layout_map[map_count][2] = AAC_CHANNEL_LFE;
break;
case ID_USAC_EXT: /* EXT */
ret = decode_usac_extension(ac, e, gb);
if (ret < 0)
return ret;
break;
};
}
ret = ff_aac_output_configure(ac, layout_map, elem_id[0] + elem_id[1] + elem_id[2],
OC_GLOBAL_HDR, 0);
if (ret < 0) {
av_log(avctx, AV_LOG_ERROR, "Unable to parse channel config!\n");
usac->nb_elems = 0;
return ret;
}
if (get_bits1(gb)) { /* usacConfigExtensionPresent */
int invalid;
int nb_extensions = get_escaped_value(gb, 2, 4, 8) + 1; /* numConfigExtensions */
for (int i = 0; i < nb_extensions; i++) {
int type = get_escaped_value(gb, 4, 8, 16);
int len = get_escaped_value(gb, 4, 8, 16);
switch (type) {
case ID_CONFIG_EXT_LOUDNESS_INFO:
ret = decode_loudness_set(ac, usac, gb);
if (ret < 0)
return ret;
break;
case ID_CONFIG_EXT_STREAM_ID:
usac->stream_identifier = get_bits(gb, 16);
break;
case ID_CONFIG_EXT_FILL: /* fallthrough */
invalid = 0;
while (len--) {
if (get_bits(gb, 8) != 0xA5)
invalid++;
}
if (invalid)
av_log(avctx, AV_LOG_WARNING, "Invalid fill bytes: %i\n",
invalid);
break;
default:
while (len--)
skip_bits(gb, 8);
break;
}
}
}
ac->avctx->profile = AV_PROFILE_AAC_USAC;
ret = ff_aac_usac_reset_state(ac, oc);
if (ret < 0)
return ret;
return 0;
}
static int decode_usac_scale_factors(AACDecContext *ac,
SingleChannelElement *sce,
GetBitContext *gb, uint8_t global_gain)
{
IndividualChannelStream *ics = &sce->ics;
/* Decode all scalefactors. */
int offset_sf = global_gain;
for (int g = 0; g < ics->num_window_groups; g++) {
for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
/* First coefficient is just the global gain */
if (!g && !sfb) {
/* The cannonical representation of quantized scalefactors
* in the spec is with 100 subtracted. */
sce->sfo[0] = offset_sf - 100;
continue;
}
offset_sf += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - SCALE_DIFF_ZERO;
if (offset_sf > 255U) {
av_log(ac->avctx, AV_LOG_ERROR,
"Scalefactor (%d) out of range.\n", offset_sf);
return AVERROR_INVALIDDATA;
}
sce->sfo[g*ics->max_sfb + sfb] = offset_sf - 100;
}
}
return 0;
}
/**
* Decode and dequantize arithmetically coded, uniformly quantized value
*
* @param coef array of dequantized, scaled spectral data
* @param sf array of scalefactors or intensity stereo positions
*
* @return Returns error status. 0 - OK, !0 - error
*/
static int decode_spectrum_ac(AACDecContext *s, float coef[1024],
GetBitContext *gb, AACArithState *state,
int reset, uint16_t len, uint16_t N)
{
AACArith ac;
int i, a, b;
uint32_t c;
int gb_count;
GetBitContext gb2;
c = ff_aac_ac_map_process(state, reset, N);
if (!len) {
ff_aac_ac_finish(state, 0, N);
return 0;
}
ff_aac_ac_init(&ac, gb);
/* Backup reader for rolling back by 14 bits at the end */
gb2 = *gb;
gb_count = get_bits_count(&gb2);
for (i = 0; i < len/2; i++) {
/* MSB */
int lvl, esc_nb, m;
c = ff_aac_ac_get_context(state, c, i, N);
for (lvl=esc_nb=0;;) {
uint32_t pki = ff_aac_ac_get_pk(c + (esc_nb << 17));
m = ff_aac_ac_decode(&ac, &gb2, ff_aac_ac_msb_cdfs[pki],
FF_ARRAY_ELEMS(ff_aac_ac_msb_cdfs[pki]));
if (m < FF_AAC_AC_ESCAPE)
break;
lvl++;
/* Cargo-culted value. */
if (lvl > 23)
return AVERROR(EINVAL);
if ((esc_nb = lvl) > 7)
esc_nb = 7;
}
b = m >> 2;
a = m - (b << 2);
/* ARITH_STOP detection */
if (!m) {
if (esc_nb)
break;
a = b = 0;
}
/* LSB */
for (int l = lvl; l > 0; l--) {
int lsbidx = !a ? 1 : (!b ? 0 : 2);
uint8_t r = ff_aac_ac_decode(&ac, &gb2, ff_aac_ac_lsb_cdfs[lsbidx],
FF_ARRAY_ELEMS(ff_aac_ac_lsb_cdfs[lsbidx]));
a = (a << 1) | (r & 1);
b = (b << 1) | ((r >> 1) & 1);
}
/* Dequantize coeffs here */
coef[2*i + 0] = a * cbrt(a);
coef[2*i + 1] = b * cbrt(b);
ff_aac_ac_update_context(state, i, a, b);
}
if (len > 1) {
/* "Rewind" bitstream back by 14 bits */
int gb_count2 = get_bits_count(&gb2);
skip_bits(gb, gb_count2 - gb_count - 14);
} else {
*gb = gb2;
}
ff_aac_ac_finish(state, i, N);
for (; i < N/2; i++) {
coef[2*i + 0] = 0;
coef[2*i + 1] = 0;
}
/* Signs */
for (i = 0; i < len; i++) {
if (coef[i]) {
if (!get_bits1(gb)) /* s */
coef[i] *= -1;
}
}
return 0;
}
static int decode_usac_stereo_cplx(AACDecContext *ac, AACUsacStereo *us,
ChannelElement *cpe, GetBitContext *gb,
int num_window_groups,
int prev_num_window_groups,
int indep_flag)
{
int delta_code_time;
IndividualChannelStream *ics = &cpe->ch[0].ics;
if (!get_bits1(gb)) { /* cplx_pred_all */
for (int g = 0; g < num_window_groups; g++) {
for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb += SFB_PER_PRED_BAND) {
const uint8_t val = get_bits1(gb);
us->pred_used[g*cpe->max_sfb_ste + sfb] = val;
if ((sfb + 1) < cpe->max_sfb_ste)
us->pred_used[g*cpe->max_sfb_ste + sfb + 1] = val;
}
}
} else {
for (int g = 0; g < num_window_groups; g++)
for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++)
us->pred_used[g*cpe->max_sfb_ste + sfb] = 1;
}
us->pred_dir = get_bits1(gb);
us->complex_coef = get_bits1(gb);
us->use_prev_frame = 0;
if (us->complex_coef && !indep_flag)
us->use_prev_frame = get_bits1(gb);
delta_code_time = 0;
if (!indep_flag)
delta_code_time = get_bits1(gb);
/* TODO: shouldn't be needed */
for (int g = 0; g < num_window_groups; g++) {
for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb += SFB_PER_PRED_BAND) {
float last_alpha_q_re = 0;
float last_alpha_q_im = 0;
if (delta_code_time) {
if (g) {
/* Transient, after the first group - use the current frame,
* previous window, alpha values. */
last_alpha_q_re = us->alpha_q_re[(g - 1)*cpe->max_sfb_ste + sfb];
last_alpha_q_im = us->alpha_q_im[(g - 1)*cpe->max_sfb_ste + sfb];
} else if (!g &&
(ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) &&
(ics->window_sequence[1] == EIGHT_SHORT_SEQUENCE)) {
/* The spec doesn't explicitly mention this, but it doesn't make
* any other sense otherwise! */
const int wg = prev_num_window_groups - 1;
last_alpha_q_re = us->prev_alpha_q_re[wg*cpe->max_sfb_ste + sfb];
last_alpha_q_im = us->prev_alpha_q_im[wg*cpe->max_sfb_ste + sfb];
} else {
last_alpha_q_re = us->prev_alpha_q_re[g*cpe->max_sfb_ste + sfb];
last_alpha_q_im = us->prev_alpha_q_im[g*cpe->max_sfb_ste + sfb];
}
} else {
if (sfb) {
last_alpha_q_re = us->alpha_q_re[g*cpe->max_sfb_ste + sfb - 1];
last_alpha_q_im = us->alpha_q_im[g*cpe->max_sfb_ste + sfb - 1];
}
}
if (us->pred_used[g*cpe->max_sfb_ste + sfb]) {
int val = -get_vlc2(gb, ff_vlc_scalefactors, 7, 3) + 60;
last_alpha_q_re += val * 0.1f;
if (us->complex_coef) {
val = -get_vlc2(gb, ff_vlc_scalefactors, 7, 3) + 60;
last_alpha_q_im += val * 0.1f;
}
us->alpha_q_re[g*cpe->max_sfb_ste + sfb] = last_alpha_q_re;
us->alpha_q_im[g*cpe->max_sfb_ste + sfb] = last_alpha_q_im;
} else {
us->alpha_q_re[g*cpe->max_sfb_ste + sfb] = 0;
us->alpha_q_im[g*cpe->max_sfb_ste + sfb] = 0;
}
if ((sfb + 1) < cpe->max_sfb_ste) {
us->alpha_q_re[g*cpe->max_sfb_ste + sfb + 1] =
us->alpha_q_re[g*cpe->max_sfb_ste + sfb];
us->alpha_q_im[g*cpe->max_sfb_ste + sfb + 1] =
us->alpha_q_im[g*cpe->max_sfb_ste + sfb];
}
}
}
return 0;
}
static int setup_sce(AACDecContext *ac, SingleChannelElement *sce,
AACUSACConfig *usac)
{
AACUsacElemData *ue = &sce->ue;
IndividualChannelStream *ics = &sce->ics;
const int sampling_index = ac->oc[1].m4ac.sampling_index;
/* Setup window parameters */
ics->prev_num_window_groups = FFMAX(ics->num_window_groups, 1);
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
if (usac->core_frame_len == 768) {
ics->swb_offset = ff_swb_offset_96[sampling_index];
ics->num_swb = ff_aac_num_swb_96[sampling_index];
} else {
ics->swb_offset = ff_swb_offset_128[sampling_index];
ics->num_swb = ff_aac_num_swb_128[sampling_index];
}
ics->tns_max_bands = ff_tns_max_bands_usac_128[sampling_index];
/* Setup scalefactor grouping. 7 bit mask. */
ics->num_window_groups = 0;
for (int j = 0; j < 7; j++) {
ics->group_len[j] = 1;
if (ue->scale_factor_grouping & (1 << (6 - j)))
ics->group_len[ics->num_window_groups] += 1;
else
ics->num_window_groups++;
}
ics->group_len[7] = 1;
ics->num_window_groups++;
ics->num_windows = 8;
} else {
if (usac->core_frame_len == 768) {
ics->swb_offset = ff_swb_offset_768[sampling_index];
ics->num_swb = ff_aac_num_swb_768[sampling_index];
} else {
ics->swb_offset = ff_swb_offset_1024[sampling_index];
ics->num_swb = ff_aac_num_swb_1024[sampling_index];
}
ics->tns_max_bands = ff_tns_max_bands_usac_1024[sampling_index];
ics->group_len[0] = 1;
ics->num_window_groups = 1;
ics->num_windows = 1;
}
if (ics->max_sfb > ics->num_swb) {
av_log(ac->avctx, AV_LOG_ERROR,
"Number of scalefactor bands in group (%d) "
"exceeds limit (%d).\n",
ics->max_sfb, ics->num_swb);
ics->max_sfb = 0;
return AVERROR(EINVAL);
}
/* Just some defaults for the band types */
for (int i = 0; i < FF_ARRAY_ELEMS(sce->band_type); i++)
sce->band_type[i] = ESC_BT;
return 0;
}
static int decode_usac_stereo_info(AACDecContext *ac, AACUSACConfig *usac,
AACUsacElemConfig *ec, ChannelElement *cpe,
GetBitContext *gb, int indep_flag)
{
int ret, tns_active;
AACUsacStereo *us = &cpe->us;
SingleChannelElement *sce1 = &cpe->ch[0];
SingleChannelElement *sce2 = &cpe->ch[1];
IndividualChannelStream *ics1 = &sce1->ics;
IndividualChannelStream *ics2 = &sce2->ics;
AACUsacElemData *ue1 = &sce1->ue;
AACUsacElemData *ue2 = &sce2->ue;
us->common_window = 0;
us->common_tw = 0;
/* Alpha values must always be zeroed out for the current frame,
* as they are propagated to the next frame and may be used. */
memset(us->alpha_q_re, 0, sizeof(us->alpha_q_re));
memset(us->alpha_q_im, 0, sizeof(us->alpha_q_im));
if (!(!ue1->core_mode && !ue2->core_mode))
return 0;
tns_active = get_bits1(gb);
us->common_window = get_bits1(gb);
if (!us->common_window || indep_flag) {
memset(us->prev_alpha_q_re, 0, sizeof(us->prev_alpha_q_re));
memset(us->prev_alpha_q_im, 0, sizeof(us->prev_alpha_q_im));
}
if (us->common_window) {
/* ics_info() */
ics1->window_sequence[1] = ics1->window_sequence[0];
ics2->window_sequence[1] = ics2->window_sequence[0];
ics1->window_sequence[0] = ics2->window_sequence[0] = get_bits(gb, 2);
ics1->use_kb_window[1] = ics1->use_kb_window[0];
ics2->use_kb_window[1] = ics2->use_kb_window[0];
ics1->use_kb_window[0] = ics2->use_kb_window[0] = get_bits1(gb);
/* If there's a change in the transform sequence, zero out last frame's
* stereo prediction coefficients */
if ((ics1->window_sequence[0] == EIGHT_SHORT_SEQUENCE &&
ics1->window_sequence[1] != EIGHT_SHORT_SEQUENCE) ||
(ics1->window_sequence[1] == EIGHT_SHORT_SEQUENCE &&
ics1->window_sequence[0] != EIGHT_SHORT_SEQUENCE) ||
(ics2->window_sequence[0] == EIGHT_SHORT_SEQUENCE &&
ics2->window_sequence[1] != EIGHT_SHORT_SEQUENCE) ||
(ics2->window_sequence[1] == EIGHT_SHORT_SEQUENCE &&
ics2->window_sequence[0] != EIGHT_SHORT_SEQUENCE)) {
memset(us->prev_alpha_q_re, 0, sizeof(us->prev_alpha_q_re));
memset(us->prev_alpha_q_im, 0, sizeof(us->prev_alpha_q_im));
}
if (ics1->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
ics1->max_sfb = ics2->max_sfb = get_bits(gb, 4);
ue1->scale_factor_grouping = ue2->scale_factor_grouping = get_bits(gb, 7);
} else {
ics1->max_sfb = ics2->max_sfb = get_bits(gb, 6);
}
if (!get_bits1(gb)) { /* common_max_sfb */
if (ics2->window_sequence[0] == EIGHT_SHORT_SEQUENCE)
ics2->max_sfb = get_bits(gb, 4);
else
ics2->max_sfb = get_bits(gb, 6);
}
ret = setup_sce(ac, sce1, usac);
if (ret < 0)
return ret;
ret = setup_sce(ac, sce2, usac);
if (ret < 0)
return ret;
cpe->max_sfb_ste = FFMAX(ics1->max_sfb, ics2->max_sfb);
us->ms_mask_mode = get_bits(gb, 2); /* ms_mask_present */
memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
if (us->ms_mask_mode == 1) {
for (int g = 0; g < ics1->num_window_groups; g++)
for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++)
cpe->ms_mask[g*cpe->max_sfb_ste + sfb] = get_bits1(gb);
} else if (us->ms_mask_mode == 2) {
memset(cpe->ms_mask, 0xFF, sizeof(cpe->ms_mask));
} else if ((us->ms_mask_mode == 3) && !ec->stereo_config_index) {
ret = decode_usac_stereo_cplx(ac, us, cpe, gb,
ics1->num_window_groups,
ics1->prev_num_window_groups,
indep_flag);
if (ret < 0)
return ret;
}
}
if (ec->tw_mdct) {
us->common_tw = get_bits1(gb);
avpriv_report_missing_feature(ac->avctx,
"AAC USAC timewarping");
return AVERROR_PATCHWELCOME;
}
us->tns_on_lr = 0;
ue1->tns_data_present = ue2->tns_data_present = 0;
if (tns_active) {
int common_tns = 0;
if (us->common_window)
common_tns = get_bits1(gb);
us->tns_on_lr = get_bits1(gb);
if (common_tns) {
ret = ff_aac_decode_tns(ac, &sce1->tns, gb, ics1);
if (ret < 0)
return ret;
memcpy(&sce2->tns, &sce1->tns, sizeof(sce1->tns));
sce2->tns.present = 1;
sce1->tns.present = 1;
ue1->tns_data_present = 0;
ue2->tns_data_present = 0;
} else {
if (get_bits1(gb)) {
ue1->tns_data_present = 1;
ue2->tns_data_present = 1;
} else {
ue2->tns_data_present = get_bits1(gb);
ue1->tns_data_present = !ue2->tns_data_present;
}
}
}
return 0;
}
/* 7.2.4 Generation of random signs for spectral noise filling
* This function is exactly defined, though we've helped the definition
* along with being slightly faster. */
static inline float noise_random_sign(unsigned int *seed)
{
unsigned int new_seed = *seed = ((*seed) * 69069) + 5;
if (((new_seed) & 0x10000) > 0)
return -1.f;
return +1.f;
}
static void apply_noise_fill(AACDecContext *ac, SingleChannelElement *sce,
AACUsacElemData *ue)
{
float *coef;
IndividualChannelStream *ics = &sce->ics;
float noise_val = powf(2, ((float)ue->noise.level - 14.0f)/3.0f);
int noise_offset = ue->noise.offset - 16;
int band_off;
band_off = ff_usac_noise_fill_start_offset[ac->oc[1].m4ac.frame_length_short]
[ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE];
coef = sce->coeffs;
for (int g = 0; g < ics->num_window_groups; g++) {
unsigned g_len = ics->group_len[g];
for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
float *cb = coef + ics->swb_offset[sfb];
int cb_len = ics->swb_offset[sfb + 1] - ics->swb_offset[sfb];
int band_quantized_to_zero = 1;
if (ics->swb_offset[sfb] < band_off)
continue;
for (int group = 0; group < (unsigned)g_len; group++, cb += 128) {
for (int z = 0; z < cb_len; z++) {
if (cb[z] == 0)
cb[z] = noise_random_sign(&sce->ue.noise.seed) * noise_val;
else
band_quantized_to_zero = 0;
}
}
if (band_quantized_to_zero)
sce->sfo[g*ics->max_sfb + sfb] += noise_offset;
}
coef += g_len << 7;
}
}
static void spectrum_scale(AACDecContext *ac, SingleChannelElement *sce,
AACUsacElemData *ue)
{
IndividualChannelStream *ics = &sce->ics;
float *coef;
/* Synthesise noise */
if (ue->noise.level)
apply_noise_fill(ac, sce, ue);
/* Noise filling may apply an offset to the scalefactor offset */
ac->dsp.dequant_scalefactors(sce);
/* Apply scalefactors */
coef = sce->coeffs;
for (int g = 0; g < ics->num_window_groups; g++) {
unsigned g_len = ics->group_len[g];
for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
float *cb = coef + ics->swb_offset[sfb];
int cb_len = ics->swb_offset[sfb + 1] - ics->swb_offset[sfb];
float sf = sce->sf[g*ics->max_sfb + sfb];
for (int group = 0; group < (unsigned)g_len; group++, cb += 128)
ac->fdsp->vector_fmul_scalar(cb, cb, sf, cb_len);
}
coef += g_len << 7;
}
}
static void complex_stereo_downmix_prev(AACDecContext *ac, ChannelElement *cpe,
float *dmix_re)
{
IndividualChannelStream *ics = &cpe->ch[0].ics;
int sign = !cpe->us.pred_dir ? +1 : -1;
float *coef1 = cpe->ch[0].coeffs;
float *coef2 = cpe->ch[1].coeffs;
for (int g = 0; g < ics->num_window_groups; g++) {
unsigned g_len = ics->group_len[g];
for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
int off = ics->swb_offset[sfb];
int cb_len = ics->swb_offset[sfb + 1] - off;
float *c1 = coef1 + off;
float *c2 = coef2 + off;
float *dm = dmix_re + off;
for (int group = 0; group < (unsigned)g_len;
group++, c1 += 128, c2 += 128, dm += 128) {
for (int z = 0; z < cb_len; z++)
dm[z] = 0.5*(c1[z] + sign*c2[z]);
}
}
coef1 += g_len << 7;
coef2 += g_len << 7;
dmix_re += g_len << 7;
}
}
static void complex_stereo_downmix_cur(AACDecContext *ac, ChannelElement *cpe,
float *dmix_re)
{
AACUsacStereo *us = &cpe->us;
IndividualChannelStream *ics = &cpe->ch[0].ics;
int sign = !cpe->us.pred_dir ? +1 : -1;
float *coef1 = cpe->ch[0].coeffs;
float *coef2 = cpe->ch[1].coeffs;
for (int g = 0; g < ics->num_window_groups; g++) {
unsigned g_len = ics->group_len[g];
for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
int off = ics->swb_offset[sfb];
int cb_len = ics->swb_offset[sfb + 1] - off;
float *c1 = coef1 + off;
float *c2 = coef2 + off;
float *dm = dmix_re + off;
if (us->pred_used[g*cpe->max_sfb_ste + sfb]) {
for (int group = 0; group < (unsigned)g_len;
group++, c1 += 128, c2 += 128, dm += 128) {
for (int z = 0; z < cb_len; z++)
dm[z] = 0.5*(c1[z] + sign*c2[z]);
}
} else {
for (int group = 0; group < (unsigned)g_len;
group++, c1 += 128, c2 += 128, dm += 128) {
for (int z = 0; z < cb_len; z++)
dm[z] = c1[z];
}
}
}
coef1 += g_len << 7;
coef2 += g_len << 7;
dmix_re += g_len << 7;
}
}
static void complex_stereo_interpolate_imag(float *im, float *re, const float f[7],
int len, int factor_even, int factor_odd)
{
int i = 0;
float s;
s = f[6]*re[2] + f[5]*re[1] + f[4]*re[0] +
f[3]*re[0] +
f[2]*re[1] + f[1]*re[2] + f[0]*re[3];
im[i] += s*factor_even;
i = 1;
s = f[6]*re[1] + f[5]*re[0] + f[4]*re[0] +
f[3]*re[1] +
f[2]*re[2] + f[1]*re[3] + f[0]*re[4];
im[i] += s*factor_odd;
i = 2;
s = f[6]*re[0] + f[5]*re[0] + f[4]*re[1] +
f[3]*re[2] +
f[2]*re[3] + f[1]*re[4] + f[0]*re[5];
im[i] += s*factor_even;
for (i = 3; i < len - 4; i += 2) {
s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
f[3]*re[i] +
f[2]*re[i+1] + f[1]*re[i+2] + f[0]*re[i+3];
im[i+0] += s*factor_odd;
s = f[6]*re[i-2] + f[5]*re[i-1] + f[4]*re[i] +
f[3]*re[i+1] +
f[2]*re[i+2] + f[1]*re[i+3] + f[0]*re[i+4];
im[i+1] += s*factor_even;
}
i = len - 3;
s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
f[3]*re[i] +
f[2]*re[i+1] + f[1]*re[i+2] + f[0]*re[i+2];
im[i] += s*factor_odd;
i = len - 2;
s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
f[3]*re[i] +
f[2]*re[i+1] + f[1]*re[i+1] + f[0]*re[i];
im[i] += s*factor_even;
i = len - 1;
s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
f[3]*re[i] +
f[2]*re[i] + f[1]*re[i-1] + f[0]*re[i-2];
im[i] += s*factor_odd;
}
static void apply_complex_stereo(AACDecContext *ac, ChannelElement *cpe)
{
AACUsacStereo *us = &cpe->us;
IndividualChannelStream *ics = &cpe->ch[0].ics;
float *coef1 = cpe->ch[0].coeffs;
float *coef2 = cpe->ch[1].coeffs;
float *dmix_im = us->dmix_im;
for (int g = 0; g < ics->num_window_groups; g++) {
unsigned g_len = ics->group_len[g];
for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
int off = ics->swb_offset[sfb];
int cb_len = ics->swb_offset[sfb + 1] - off;
float *c1 = coef1 + off;
float *c2 = coef2 + off;
float *dm_im = dmix_im + off;
float alpha_re = us->alpha_q_re[g*cpe->max_sfb_ste + sfb];
float alpha_im = us->alpha_q_im[g*cpe->max_sfb_ste + sfb];
if (!us->pred_used[g*cpe->max_sfb_ste + sfb])
continue;
if (!cpe->us.pred_dir) {
for (int group = 0; group < (unsigned)g_len;
group++, c1 += 128, c2 += 128, dm_im += 128) {
for (int z = 0; z < cb_len; z++) {
float side;
side = c2[z] - alpha_re*c1[z] - alpha_im*dm_im[z];
c2[z] = c1[z] - side;
c1[z] = c1[z] + side;
}
}
} else {
for (int group = 0; group < (unsigned)g_len;
group++, c1 += 128, c2 += 128, dm_im += 128) {
for (int z = 0; z < cb_len; z++) {
float mid;
mid = c2[z] - alpha_re*c1[z] - alpha_im*dm_im[z];
c2[z] = mid - c1[z];
c1[z] = mid + c1[z];
}
}
}
}
coef1 += g_len << 7;
coef2 += g_len << 7;
dmix_im += g_len << 7;
}
}
static const float *complex_stereo_get_filter(ChannelElement *cpe, int is_prev)
{
int win, shape;
if (!is_prev) {
switch (cpe->ch[0].ics.window_sequence[0]) {
default:
case ONLY_LONG_SEQUENCE:
case EIGHT_SHORT_SEQUENCE:
win = 0;
break;
case LONG_START_SEQUENCE:
win = 1;
break;
case LONG_STOP_SEQUENCE:
win = 2;
break;
}
if (cpe->ch[0].ics.use_kb_window[0] == 0 &&
cpe->ch[0].ics.use_kb_window[1] == 0)
shape = 0;
else if (cpe->ch[0].ics.use_kb_window[0] == 1 &&
cpe->ch[0].ics.use_kb_window[1] == 1)
shape = 1;
else if (cpe->ch[0].ics.use_kb_window[0] == 0 &&
cpe->ch[0].ics.use_kb_window[1] == 1)
shape = 2;
else if (cpe->ch[0].ics.use_kb_window[0] == 1 &&
cpe->ch[0].ics.use_kb_window[1] == 0)
shape = 3;
else
shape = 3;
} else {
win = cpe->ch[0].ics.window_sequence[0] == LONG_STOP_SEQUENCE;
shape = cpe->ch[0].ics.use_kb_window[1];
}
return ff_aac_usac_mdst_filt_cur[win][shape];
}
static void spectrum_decode(AACDecContext *ac, AACUSACConfig *usac,
ChannelElement *cpe, int nb_channels)
{
AACUsacStereo *us = &cpe->us;
for (int ch = 0; ch < nb_channels; ch++) {
SingleChannelElement *sce = &cpe->ch[ch];
AACUsacElemData *ue = &sce->ue;
spectrum_scale(ac, sce, ue);
}
if (nb_channels > 1 && us->common_window) {
for (int ch = 0; ch < nb_channels; ch++) {
SingleChannelElement *sce = &cpe->ch[ch];
/* Apply TNS, if the tns_on_lr bit is not set. */
if (sce->tns.present && !us->tns_on_lr)
ac->dsp.apply_tns(sce->coeffs, &sce->tns, &sce->ics, 1);
}
if (us->ms_mask_mode == 3) {
const float *filt;
complex_stereo_downmix_cur(ac, cpe, us->dmix_re);
complex_stereo_downmix_prev(ac, cpe, us->prev_dmix_re);
filt = complex_stereo_get_filter(cpe, 0);
complex_stereo_interpolate_imag(us->dmix_im, us->dmix_re, filt,
usac->core_frame_len, 1, 1);
if (us->use_prev_frame) {
filt = complex_stereo_get_filter(cpe, 1);
complex_stereo_interpolate_imag(us->dmix_im, us->prev_dmix_re, filt,
usac->core_frame_len, -1, 1);
}
apply_complex_stereo(ac, cpe);
} else if (us->ms_mask_mode > 0) {
ac->dsp.apply_mid_side_stereo(ac, cpe);
}
}
/* Save coefficients and alpha values for prediction reasons */
if (nb_channels > 1) {
AACUsacStereo *us = &cpe->us;
for (int ch = 0; ch < nb_channels; ch++) {
SingleChannelElement *sce = &cpe->ch[ch];
memcpy(sce->prev_coeffs, sce->coeffs, sizeof(sce->coeffs));
}
memcpy(us->prev_alpha_q_re, us->alpha_q_re, sizeof(us->alpha_q_re));
memcpy(us->prev_alpha_q_im, us->alpha_q_im, sizeof(us->alpha_q_im));
}
for (int ch = 0; ch < nb_channels; ch++) {
SingleChannelElement *sce = &cpe->ch[ch];
/* Apply TNS, if it hasn't been applied yet. */
if (sce->tns.present && ((nb_channels == 1) || (us->tns_on_lr)))
ac->dsp.apply_tns(sce->coeffs, &sce->tns, &sce->ics, 1);
ac->oc[1].m4ac.frame_length_short ? ac->dsp.imdct_and_windowing_768(ac, sce) :
ac->dsp.imdct_and_windowing(ac, sce);
}
}
static int decode_usac_core_coder(AACDecContext *ac, AACUSACConfig *usac,
AACUsacElemConfig *ec, ChannelElement *che,
GetBitContext *gb, int indep_flag, int nb_channels)
{
int ret;
int arith_reset_flag;
AACUsacStereo *us = &che->us;
int core_nb_channels = nb_channels;
/* Local symbols */
uint8_t global_gain;
us->common_window = 0;
for (int ch = 0; ch < core_nb_channels; ch++) {
SingleChannelElement *sce = &che->ch[ch];
AACUsacElemData *ue = &sce->ue;
sce->tns.present = 0;
ue->tns_data_present = 0;
ue->core_mode = get_bits1(gb);
}
if (nb_channels > 1 && ec->stereo_config_index == 1)
core_nb_channels = 1;
if (core_nb_channels == 2) {
ret = decode_usac_stereo_info(ac, usac, ec, che, gb, indep_flag);
if (ret)
return ret;
}
for (int ch = 0; ch < core_nb_channels; ch++) {
SingleChannelElement *sce = &che->ch[ch];
IndividualChannelStream *ics = &sce->ics;
AACUsacElemData *ue = &sce->ue;
if (ue->core_mode) { /* lpd_channel_stream */
ret = ff_aac_ldp_parse_channel_stream(ac, usac, ue, gb);
if (ret < 0)
return ret;
continue;
}
if ((core_nb_channels == 1) ||
(che->ch[0].ue.core_mode != che->ch[1].ue.core_mode))
ue->tns_data_present = get_bits1(gb);
/* fd_channel_stream */
global_gain = get_bits(gb, 8);
ue->noise.level = 0;
if (ec->noise_fill) {
ue->noise.level = get_bits(gb, 3);
ue->noise.offset = get_bits(gb, 5);
}
if (!us->common_window) {
/* ics_info() */
ics->window_sequence[1] = ics->window_sequence[0];
ics->window_sequence[0] = get_bits(gb, 2);
ics->use_kb_window[1] = ics->use_kb_window[0];
ics->use_kb_window[0] = get_bits1(gb);
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
ics->max_sfb = get_bits(gb, 4);
ue->scale_factor_grouping = get_bits(gb, 7);
} else {
ics->max_sfb = get_bits(gb, 6);
}
ret = setup_sce(ac, sce, usac);
if (ret < 0)
return ret;
}
if (ec->tw_mdct && !us->common_tw) {
/* tw_data() */
if (get_bits1(gb)) { /* tw_data_present */
/* Time warping is not supported in baseline profile streams. */
avpriv_report_missing_feature(ac->avctx,
"AAC USAC timewarping");
return AVERROR_PATCHWELCOME;
}
}
ret = decode_usac_scale_factors(ac, sce, gb, global_gain);
if (ret < 0)
return ret;
if (ue->tns_data_present) {
sce->tns.present = 1;
ret = ff_aac_decode_tns(ac, &sce->tns, gb, ics);
if (ret < 0)
return ret;
}
/* ac_spectral_data */
arith_reset_flag = indep_flag;
if (!arith_reset_flag)
arith_reset_flag = get_bits1(gb);
/* Decode coeffs */
memset(&sce->coeffs[0], 0, 1024*sizeof(float));
for (int win = 0; win < ics->num_windows; win++) {
int lg = ics->swb_offset[ics->max_sfb];
int N;
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE)
N = usac->core_frame_len / 8;
else
N = usac->core_frame_len;
ret = decode_spectrum_ac(ac, sce->coeffs + win*128, gb, &ue->ac,
arith_reset_flag && (win == 0), lg, N);
if (ret < 0)
return ret;
}
if (get_bits1(gb)) { /* fac_data_present */
const uint16_t len_8 = usac->core_frame_len / 8;
const uint16_t len_16 = usac->core_frame_len / 16;
const uint16_t fac_len = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? len_16 : len_8;
ret = ff_aac_parse_fac_data(ue, gb, 1, fac_len);
if (ret < 0)
return ret;
}
}
if (ec->sbr.ratio) {
int sbr_ch = nb_channels;
if (nb_channels == 2 &&
!(ec->stereo_config_index == 0 || ec->stereo_config_index == 3))
sbr_ch = 1;
ret = ff_aac_sbr_decode_usac_data(ac, che, ec, gb, sbr_ch, indep_flag);
if (ret < 0)
return ret;
if (ec->stereo_config_index) {
avpriv_report_missing_feature(ac->avctx, "AAC USAC Mps212");
return AVERROR_PATCHWELCOME;
}
}
spectrum_decode(ac, usac, che, core_nb_channels);
if (ac->oc[1].m4ac.sbr > 0) {
ac->proc.sbr_apply(ac, che, nb_channels == 2 ? TYPE_CPE : TYPE_SCE,
che->ch[0].output,
che->ch[1].output);
}
return 0;
}
static int parse_audio_preroll(AACDecContext *ac, GetBitContext *gb)
{
int ret = 0;
GetBitContext gbc;
OutputConfiguration *oc = &ac->oc[1];
MPEG4AudioConfig *m4ac = &oc->m4ac;
MPEG4AudioConfig m4ac_bak = oc->m4ac;
uint8_t temp_data[512];
uint8_t *tmp_buf = temp_data;
size_t tmp_buf_size = sizeof(temp_data);
av_unused int crossfade;
int num_preroll_frames;
int config_len = get_escaped_value(gb, 4, 4, 8);
/* Implementations are free to pad the config to any length, so use a
* different reader for this. */
gbc = *gb;
ret = ff_aac_usac_config_decode(ac, ac->avctx, &gbc, oc, m4ac->chan_config);
if (ret < 0) {
*m4ac = m4ac_bak;
return ret;
} else {
ac->oc[1].m4ac.chan_config = 0;
}
/* 7.18.3.3 Bitrate adaption
* If configuration didn't change after applying preroll, continue
* without decoding it. */
if (!memcmp(m4ac, &m4ac_bak, sizeof(m4ac_bak)))
return 0;
skip_bits_long(gb, config_len*8);
crossfade = get_bits1(gb); /* applyCrossfade */
skip_bits1(gb); /* reserved */
num_preroll_frames = get_escaped_value(gb, 2, 4, 0); /* numPreRollFrames */
for (int i = 0; i < num_preroll_frames; i++) {
int got_frame_ptr = 0;
int au_len = get_escaped_value(gb, 16, 16, 0);
if (au_len*8 > tmp_buf_size) {
uint8_t *tmp2;
tmp_buf = tmp_buf == temp_data ? NULL : tmp_buf;
tmp2 = av_realloc_array(tmp_buf, au_len, 8);
if (!tmp2) {
if (tmp_buf != temp_data)
av_free(tmp_buf);
return AVERROR(ENOMEM);
}
tmp_buf = tmp2;
}
/* Byte alignment is not guaranteed. */
for (int i = 0; i < au_len; i++)
tmp_buf[i] = get_bits(gb, 8);
ret = init_get_bits8(&gbc, tmp_buf, au_len);
if (ret < 0)
break;
ret = ff_aac_usac_decode_frame(ac->avctx, ac, &gbc, &got_frame_ptr);
if (ret < 0)
break;
}
if (tmp_buf != temp_data)
av_free(tmp_buf);
return 0;
}
static int parse_ext_ele(AACDecContext *ac, AACUsacElemConfig *e,
GetBitContext *gb)
{
uint8_t *tmp;
uint8_t pl_frag_start = 1;
uint8_t pl_frag_end = 1;
uint32_t len;
if (!get_bits1(gb)) /* usacExtElementPresent */
return 0;
if (get_bits1(gb)) { /* usacExtElementUseDefaultLength */
len = e->ext.default_len;
} else {
len = get_bits(gb, 8); /* usacExtElementPayloadLength */
if (len == 255)
len += get_bits(gb, 16) - 2;
}
if (!len)
return 0;
if (e->ext.payload_frag) {
pl_frag_start = get_bits1(gb); /* usacExtElementStart */
pl_frag_end = get_bits1(gb); /* usacExtElementStop */
}
if (pl_frag_start)
e->ext.pl_data_offset = 0;
/* If an extension starts and ends this packet, we can directly use it */
if (!(pl_frag_start && pl_frag_end)) {
tmp = av_realloc(e->ext.pl_data, e->ext.pl_data_offset + len);
if (!tmp) {
av_free(e->ext.pl_data);
return AVERROR(ENOMEM);
}
e->ext.pl_data = tmp;
/* Readout data to a buffer */
for (int i = 0; i < len; i++)
e->ext.pl_data[e->ext.pl_data_offset + i] = get_bits(gb, 8);
}
e->ext.pl_data_offset += len;
if (pl_frag_end) {
int ret = 0;
int start_bits = get_bits_count(gb);
const int pl_len = e->ext.pl_data_offset;
GetBitContext *gb2 = gb;
GetBitContext gbc;
if (!(pl_frag_start && pl_frag_end)) {
ret = init_get_bits8(&gbc, e->ext.pl_data, pl_len);
if (ret < 0)
return ret;
gb2 = &gbc;
}
switch (e->ext.type) {
case ID_EXT_ELE_FILL:
/* Filler elements have no usable payload */
break;
case ID_EXT_ELE_AUDIOPREROLL:
ret = parse_audio_preroll(ac, gb2);
break;
default:
/* This should never happen */
av_assert0(0);
}
av_freep(&e->ext.pl_data);
if (ret < 0)
return ret;
skip_bits_long(gb, pl_len*8 - (get_bits_count(gb) - start_bits));
}
return 0;
}
int ff_aac_usac_decode_frame(AVCodecContext *avctx, AACDecContext *ac,
GetBitContext *gb, int *got_frame_ptr)
{
int ret, is_dmono = 0;
int indep_flag, samples = 0;
int audio_found = 0;
int elem_id[3 /* SCE, CPE, LFE */] = { 0, 0, 0 };
AVFrame *frame = ac->frame;
int ratio_mult, ratio_dec;
AACUSACConfig *usac = &ac->oc[1].usac;
int sbr_ratio = usac->core_sbr_frame_len_idx == 2 ? 2 :
usac->core_sbr_frame_len_idx == 3 ? 3 :
usac->core_sbr_frame_len_idx == 4 ? 1 :
0;
if (sbr_ratio == 2) {
ratio_mult = 8;
ratio_dec = 3;
} else if (sbr_ratio == 3) {
ratio_mult = 2;
ratio_dec = 1;
} else if (sbr_ratio == 4) {
ratio_mult = 4;
ratio_dec = 1;
} else {
ratio_mult = 1;
ratio_dec = 1;
}
ff_aac_output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
ac->oc[1].status, 0);
ac->avctx->profile = AV_PROFILE_AAC_USAC;
indep_flag = get_bits1(gb);
for (int i = 0; i < ac->oc[1].usac.nb_elems; i++) {
int layout_id;
int layout_type;
AACUsacElemConfig *e = &ac->oc[1].usac.elems[i];
ChannelElement *che;
if (e->type == ID_USAC_SCE) {
layout_id = elem_id[0]++;
layout_type = TYPE_SCE;
che = ff_aac_get_che(ac, TYPE_SCE, layout_id);
} else if (e->type == ID_USAC_CPE) {
layout_id = elem_id[1]++;
layout_type = TYPE_CPE;
che = ff_aac_get_che(ac, TYPE_CPE, layout_id);
} else if (e->type == ID_USAC_LFE) {
layout_id = elem_id[2]++;
layout_type = TYPE_LFE;
che = ff_aac_get_che(ac, TYPE_LFE, layout_id);
}
if (e->type != ID_USAC_EXT && !che) {
av_log(ac->avctx, AV_LOG_ERROR,
"channel element %d.%d is not allocated\n",
layout_type, layout_id);
return AVERROR_INVALIDDATA;
}
switch (e->type) {
case ID_USAC_LFE:
/* Fallthrough */
case ID_USAC_SCE:
ret = decode_usac_core_coder(ac, &ac->oc[1].usac, e, che, gb,
indep_flag, 1);
if (ret < 0)
return ret;
audio_found = 1;
che->present = 1;
break;
case ID_USAC_CPE:
ret = decode_usac_core_coder(ac, &ac->oc[1].usac, e, che, gb,
indep_flag, 2);
if (ret < 0)
return ret;
audio_found = 1;
che->present = 1;
break;
case ID_USAC_EXT:
ret = parse_ext_ele(ac, e, gb);
if (ret < 0)
return ret;
break;
}
}
if (audio_found)
samples = ac->oc[1].m4ac.frame_length_short ? 768 : 1024;
samples = (samples * ratio_mult) / ratio_dec;
if (ac->oc[1].status && audio_found) {
avctx->sample_rate = ac->oc[1].m4ac.ext_sample_rate;
avctx->frame_size = samples;
ac->oc[1].status = OC_LOCKED;
}
if (!frame->data[0] && samples) {
av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
return AVERROR_INVALIDDATA;
}
if (samples) {
frame->nb_samples = samples;
frame->sample_rate = avctx->sample_rate;
frame->flags = indep_flag ? AV_FRAME_FLAG_KEY : 0x0;
*got_frame_ptr = 1;
} else {
av_frame_unref(ac->frame);
frame->flags = indep_flag ? AV_FRAME_FLAG_KEY : 0x0;
*got_frame_ptr = 0;
}
/* for dual-mono audio (SCE + SCE) */
is_dmono = ac->dmono_mode && elem_id[0] == 2 &&
!av_channel_layout_compare(&ac->oc[1].ch_layout,
&(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO);
if (is_dmono) {
if (ac->dmono_mode == 1)
frame->data[1] = frame->data[0];
else if (ac->dmono_mode == 2)
frame->data[0] = frame->data[1];
}
return 0;
}