/* * Atrac 3 compatible decoder * Copyright (c) 2006-2008 Maxim Poliakovski * Copyright (c) 2006-2008 Benjamin Larsson * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /** * @file * Atrac 3 compatible decoder. * This decoder handles Sony's ATRAC3 data. * * Container formats used to store atrac 3 data: * RealMedia (.rm), RIFF WAV (.wav, .at3), Sony OpenMG (.oma, .aa3). * * To use this decoder, a calling application must supply the extradata * bytes provided in the containers above. */ #include #include #include #include "avcodec.h" #include "get_bits.h" #include "dsputil.h" #include "bytestream.h" #include "fft.h" #include "atrac.h" #include "atrac3data.h" #define JOINT_STEREO 0x12 #define STEREO 0x2 /* These structures are needed to store the parsed gain control data. */ typedef struct { int num_gain_data; int levcode[8]; int loccode[8]; } gain_info; typedef struct { gain_info gBlock[4]; } gain_block; typedef struct { int pos; int numCoefs; float coef[8]; } tonal_component; typedef struct { int bandsCoded; int numComponents; tonal_component components[64]; float prevFrame[1024]; int gcBlkSwitch; gain_block gainBlock[2]; DECLARE_ALIGNED(32, float, spectrum)[1024]; DECLARE_ALIGNED(32, float, IMDCT_buf)[1024]; float delayBuf1[46]; ///mdct_ctx.imdct_calc(&q->mdct_ctx,pOutput,pInput); /* Perform windowing on the output. */ dsp.vector_fmul(pOutput, pOutput, mdct_window, 512); } /** * Atrac 3 indata descrambling, only used for data coming from the rm container * * @param inbuffer pointer to 8 bit array of indata * @param out pointer to 8 bit array of outdata * @param bytes amount of bytes */ static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){ int i, off; uint32_t c; const uint32_t* buf; uint32_t* obuf = (uint32_t*) out; off = (intptr_t)inbuffer & 3; buf = (const uint32_t *)(inbuffer - off); if (off) c = av_be2ne32((0x537F6103U >> (off * 8)) | (0x537F6103U << (32 - (off * 8)))); else c = av_be2ne32(0x537F6103U); bytes += 3 + off; for (i = 0; i < bytes/4; i++) obuf[i] = c ^ buf[i]; if (off) av_log_ask_for_sample(NULL, "Offset of %d not handled.\n", off); return off; } static av_cold void init_atrac3_transforms(ATRAC3Context *q) { float enc_window[256]; int i; /* Generate the mdct window, for details see * http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */ for (i=0 ; i<256; i++) enc_window[i] = (sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0) * 0.5; if (!mdct_window[0]) for (i=0 ; i<256; i++) { mdct_window[i] = enc_window[i]/(enc_window[i]*enc_window[i] + enc_window[255-i]*enc_window[255-i]); mdct_window[511-i] = mdct_window[i]; } /* Initialize the MDCT transform. */ ff_mdct_init(&q->mdct_ctx, 9, 1, 1.0); } /** * Atrac3 uninit, free all allocated memory */ static av_cold int atrac3_decode_close(AVCodecContext *avctx) { ATRAC3Context *q = avctx->priv_data; av_free(q->pUnits); av_free(q->decoded_bytes_buffer); ff_mdct_end(&q->mdct_ctx); return 0; } /** / * Mantissa decoding * * @param gb the GetBit context * @param selector what table is the output values coded with * @param codingFlag constant length coding or variable length coding * @param mantissas mantissa output table * @param numCodes amount of values to get */ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int codingFlag, int* mantissas, int numCodes) { int numBits, cnt, code, huffSymb; if (selector == 1) numCodes /= 2; if (codingFlag != 0) { /* constant length coding (CLC) */ numBits = CLCLengthTab[selector]; if (selector > 1) { for (cnt = 0; cnt < numCodes; cnt++) { if (numBits) code = get_sbits(gb, numBits); else code = 0; mantissas[cnt] = code; } } else { for (cnt = 0; cnt < numCodes; cnt++) { if (numBits) code = get_bits(gb, numBits); //numBits is always 4 in this case else code = 0; mantissas[cnt*2] = seTab_0[code >> 2]; mantissas[cnt*2+1] = seTab_0[code & 3]; } } } else { /* variable length coding (VLC) */ if (selector != 1) { for (cnt = 0; cnt < numCodes; cnt++) { huffSymb = get_vlc2(gb, spectral_coeff_tab[selector-1].table, spectral_coeff_tab[selector-1].bits, 3); huffSymb += 1; code = huffSymb >> 1; if (huffSymb & 1) code = -code; mantissas[cnt] = code; } } else { for (cnt = 0; cnt < numCodes; cnt++) { huffSymb = get_vlc2(gb, spectral_coeff_tab[selector-1].table, spectral_coeff_tab[selector-1].bits, 3); mantissas[cnt*2] = decTable1[huffSymb*2]; mantissas[cnt*2+1] = decTable1[huffSymb*2+1]; } } } } /** * Restore the quantized band spectrum coefficients * * @param gb the GetBit context * @param pOut decoded band spectrum * @return outSubbands subband counter, fix for broken specification/files */ static int decodeSpectrum (GetBitContext *gb, float *pOut) { int numSubbands, codingMode, cnt, first, last, subbWidth, *pIn; int subband_vlc_index[32], SF_idxs[32]; int mantissas[128]; float SF; numSubbands = get_bits(gb, 5); // number of coded subbands codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC /* Get the VLC selector table for the subbands, 0 means not coded. */ for (cnt = 0; cnt <= numSubbands; cnt++) subband_vlc_index[cnt] = get_bits(gb, 3); /* Read the scale factor indexes from the stream. */ for (cnt = 0; cnt <= numSubbands; cnt++) { if (subband_vlc_index[cnt] != 0) SF_idxs[cnt] = get_bits(gb, 6); } for (cnt = 0; cnt <= numSubbands; cnt++) { first = subbandTab[cnt]; last = subbandTab[cnt+1]; subbWidth = last - first; if (subband_vlc_index[cnt] != 0) { /* Decode spectral coefficients for this subband. */ /* TODO: This can be done faster is several blocks share the * same VLC selector (subband_vlc_index) */ readQuantSpectralCoeffs (gb, subband_vlc_index[cnt], codingMode, mantissas, subbWidth); /* Decode the scale factor for this subband. */ SF = ff_atrac_sf_table[SF_idxs[cnt]] * iMaxQuant[subband_vlc_index[cnt]]; /* Inverse quantize the coefficients. */ for (pIn=mantissas ; first> 2] == 0) continue; coded_components = get_bits(gb,3); for (k=0; k= 64) return AVERROR_INVALIDDATA; pComponent[component_count].pos = j * 64 + (get_bits(gb,6)); max_coded_values = 1024 - pComponent[component_count].pos; coded_values = coded_values_per_component + 1; coded_values = FFMIN(max_coded_values,coded_values); scalefactor = ff_atrac_sf_table[sfIndx] * iMaxQuant[quant_step_index]; readQuantSpectralCoeffs(gb, quant_step_index, coding_mode, mantissa, coded_values); pComponent[component_count].numCoefs = coded_values; /* inverse quant */ pCoef = pComponent[component_count].coef; for (cnt = 0; cnt < coded_values; cnt++) pCoef[cnt] = mantissa[cnt] * scalefactor; component_count++; } } } return component_count; } /** * Decode gain parameters for the coded bands * * @param gb the GetBit context * @param pGb the gainblock for the current band * @param numBands amount of coded bands */ static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands) { int i, cf, numData; int *pLevel, *pLoc; gain_info *pGain = pGb->gBlock; for (i=0 ; i<=numBands; i++) { numData = get_bits(gb,3); pGain[i].num_gain_data = numData; pLevel = pGain[i].levcode; pLoc = pGain[i].loccode; for (cf = 0; cf < numData; cf++){ pLevel[cf]= get_bits(gb,4); pLoc [cf]= get_bits(gb,5); if(cf && pLoc[cf] <= pLoc[cf-1]) return -1; } } /* Clear the unused blocks. */ for (; i<4 ; i++) pGain[i].num_gain_data = 0; return 0; } /** * Apply gain parameters and perform the MDCT overlapping part * * @param pIn input float buffer * @param pPrev previous float buffer to perform overlap against * @param pOut output float buffer * @param pGain1 current band gain info * @param pGain2 next band gain info */ static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gain_info *pGain1, gain_info *pGain2) { /* gain compensation function */ float gain1, gain2, gain_inc; int cnt, numdata, nsample, startLoc, endLoc; if (pGain2->num_gain_data == 0) gain1 = 1.0; else gain1 = gain_tab1[pGain2->levcode[0]]; if (pGain1->num_gain_data == 0) { for (cnt = 0; cnt < 256; cnt++) pOut[cnt] = pIn[cnt] * gain1 + pPrev[cnt]; } else { numdata = pGain1->num_gain_data; pGain1->loccode[numdata] = 32; pGain1->levcode[numdata] = 4; nsample = 0; // current sample = 0 for (cnt = 0; cnt < numdata; cnt++) { startLoc = pGain1->loccode[cnt] * 8; endLoc = startLoc + 8; gain2 = gain_tab1[pGain1->levcode[cnt]]; gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15]; /* interpolate */ for (; nsample < startLoc; nsample++) pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; /* interpolation is done over eight samples */ for (; nsample < endLoc; nsample++) { pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; gain2 *= gain_inc; } } for (; nsample < 256; nsample++) pOut[nsample] = (pIn[nsample] * gain1) + pPrev[nsample]; } /* Delay for the overlapping part. */ memcpy(pPrev, &pIn[256], 256*sizeof(float)); } /** * Combine the tonal band spectrum and regular band spectrum * Return position of the last tonal coefficient * * @param pSpectrum output spectrum buffer * @param numComponents amount of tonal components * @param pComponent tonal components for this band */ static int addTonalComponents (float *pSpectrum, int numComponents, tonal_component *pComponent) { int cnt, i, lastPos = -1; float *pIn, *pOut; for (cnt = 0; cnt < numComponents; cnt++){ lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos); pIn = pComponent[cnt].coef; pOut = &(pSpectrum[pComponent[cnt].pos]); for (i=0 ; ibandsCoded = get_bits(gb,2); result = decodeGainControl (gb, &(pSnd->gainBlock[pSnd->gcBlkSwitch]), pSnd->bandsCoded); if (result) return result; pSnd->numComponents = decodeTonalComponents (gb, pSnd->components, pSnd->bandsCoded); if (pSnd->numComponents == -1) return -1; numSubbands = decodeSpectrum (gb, pSnd->spectrum); /* Merge the decoded spectrum and tonal components. */ lastTonal = addTonalComponents (pSnd->spectrum, pSnd->numComponents, pSnd->components); /* calculate number of used MLT/QMF bands according to the amount of coded spectral lines */ numBands = (subbandTab[numSubbands] - 1) >> 8; if (lastTonal >= 0) numBands = FFMAX((lastTonal + 256) >> 8, numBands); /* Reconstruct time domain samples. */ for (band=0; band<4; band++) { /* Perform the IMDCT step without overlapping. */ if (band <= numBands) { IMLT(q, &(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1); } else memset(pSnd->IMDCT_buf, 0, 512 * sizeof(float)); /* gain compensation and overlapping */ gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]), &((pSnd->gainBlock[1 - (pSnd->gcBlkSwitch)]).gBlock[band]), &((pSnd->gainBlock[pSnd->gcBlkSwitch]).gBlock[band])); } /* Swap the gain control buffers for the next frame. */ pSnd->gcBlkSwitch ^= 1; return 0; } /** * Frame handling * * @param q Atrac3 private context * @param databuf the input data */ static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf) { int result, i; float *p1, *p2, *p3, *p4; uint8_t *ptr1; if (q->codingMode == JOINT_STEREO) { /* channel coupling mode */ /* decode Sound Unit 1 */ init_get_bits(&q->gb,databuf,q->bits_per_frame); result = decodeChannelSoundUnit(q,&q->gb, q->pUnits, q->outSamples, 0, JOINT_STEREO); if (result != 0) return (result); /* Framedata of the su2 in the joint-stereo mode is encoded in * reverse byte order so we need to swap it first. */ if (databuf == q->decoded_bytes_buffer) { uint8_t *ptr2 = q->decoded_bytes_buffer+q->bytes_per_frame-1; ptr1 = q->decoded_bytes_buffer; for (i = 0; i < (q->bytes_per_frame/2); i++, ptr1++, ptr2--) { FFSWAP(uint8_t,*ptr1,*ptr2); } } else { const uint8_t *ptr2 = databuf+q->bytes_per_frame-1; for (i = 0; i < q->bytes_per_frame; i++) q->decoded_bytes_buffer[i] = *ptr2--; } /* Skip the sync codes (0xF8). */ ptr1 = q->decoded_bytes_buffer; for (i = 4; *ptr1 == 0xF8; i++, ptr1++) { if (i >= q->bytes_per_frame) return -1; } /* set the bitstream reader at the start of the second Sound Unit*/ init_get_bits(&q->gb,ptr1,q->bits_per_frame); /* Fill the Weighting coeffs delay buffer */ memmove(q->weighting_delay,&(q->weighting_delay[2]),4*sizeof(int)); q->weighting_delay[4] = get_bits1(&q->gb); q->weighting_delay[5] = get_bits(&q->gb,3); for (i = 0; i < 4; i++) { q->matrix_coeff_index_prev[i] = q->matrix_coeff_index_now[i]; q->matrix_coeff_index_now[i] = q->matrix_coeff_index_next[i]; q->matrix_coeff_index_next[i] = get_bits(&q->gb,2); } /* Decode Sound Unit 2. */ result = decodeChannelSoundUnit(q,&q->gb, &q->pUnits[1], &q->outSamples[1024], 1, JOINT_STEREO); if (result != 0) return (result); /* Reconstruct the channel coefficients. */ reverseMatrixing(q->outSamples, &q->outSamples[1024], q->matrix_coeff_index_prev, q->matrix_coeff_index_now); channelWeighting(q->outSamples, &q->outSamples[1024], q->weighting_delay); } else { /* normal stereo mode or mono */ /* Decode the channel sound units. */ for (i=0 ; ichannels ; i++) { /* Set the bitstream reader at the start of a channel sound unit. */ init_get_bits(&q->gb, databuf+((i*q->bytes_per_frame)/q->channels), (q->bits_per_frame)/q->channels); result = decodeChannelSoundUnit(q,&q->gb, &q->pUnits[i], &q->outSamples[i*1024], i, q->codingMode); if (result != 0) return (result); } } /* Apply the iQMF synthesis filter. */ p1= q->outSamples; for (i=0 ; ichannels ; i++) { p2= p1+256; p3= p2+256; p4= p3+256; atrac_iqmf (p1, p2, 256, p1, q->pUnits[i].delayBuf1, q->tempBuf); atrac_iqmf (p4, p3, 256, p3, q->pUnits[i].delayBuf2, q->tempBuf); atrac_iqmf (p1, p3, 512, p1, q->pUnits[i].delayBuf3, q->tempBuf); p1 +=1024; } return 0; } /** * Atrac frame decoding * * @param avctx pointer to the AVCodecContext */ static int atrac3_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) { const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; ATRAC3Context *q = avctx->priv_data; int result = 0, i; const uint8_t* databuf; int16_t* samples = data; if (buf_size < avctx->block_align) { av_log(avctx, AV_LOG_ERROR, "Frame too small (%d bytes). Truncated file?\n", buf_size); *data_size = 0; return buf_size; } /* Check if we need to descramble and what buffer to pass on. */ if (q->scrambled_stream) { decode_bytes(buf, q->decoded_bytes_buffer, avctx->block_align); databuf = q->decoded_bytes_buffer; } else { databuf = buf; } result = decodeFrame(q, databuf); if (result != 0) { av_log(NULL,AV_LOG_ERROR,"Frame decoding error!\n"); return -1; } if (q->channels == 1) { /* mono */ for (i = 0; i<1024; i++) samples[i] = av_clip_int16(round(q->outSamples[i])); *data_size = 1024 * sizeof(int16_t); } else { /* stereo */ for (i = 0; i < 1024; i++) { samples[i*2] = av_clip_int16(round(q->outSamples[i])); samples[i*2+1] = av_clip_int16(round(q->outSamples[1024+i])); } *data_size = 2048 * sizeof(int16_t); } return avctx->block_align; } /** * Atrac3 initialization * * @param avctx pointer to the AVCodecContext */ static av_cold int atrac3_decode_init(AVCodecContext *avctx) { int i; const uint8_t *edata_ptr = avctx->extradata; ATRAC3Context *q = avctx->priv_data; static VLC_TYPE atrac3_vlc_table[4096][2]; static int vlcs_initialized = 0; /* Take data from the AVCodecContext (RM container). */ q->sample_rate = avctx->sample_rate; q->channels = avctx->channels; q->bit_rate = avctx->bit_rate; q->bits_per_frame = avctx->block_align * 8; q->bytes_per_frame = avctx->block_align; /* Take care of the codec-specific extradata. */ if (avctx->extradata_size == 14) { /* Parse the extradata, WAV format */ av_log(avctx,AV_LOG_DEBUG,"[0-1] %d\n",bytestream_get_le16(&edata_ptr)); //Unknown value always 1 q->samples_per_channel = bytestream_get_le32(&edata_ptr); q->codingMode = bytestream_get_le16(&edata_ptr); av_log(avctx,AV_LOG_DEBUG,"[8-9] %d\n",bytestream_get_le16(&edata_ptr)); //Dupe of coding mode q->frame_factor = bytestream_get_le16(&edata_ptr); //Unknown always 1 av_log(avctx,AV_LOG_DEBUG,"[12-13] %d\n",bytestream_get_le16(&edata_ptr)); //Unknown always 0 /* setup */ q->samples_per_frame = 1024 * q->channels; q->atrac3version = 4; q->delay = 0x88E; if (q->codingMode) q->codingMode = JOINT_STEREO; else q->codingMode = STEREO; q->scrambled_stream = 0; if ((q->bytes_per_frame == 96*q->channels*q->frame_factor) || (q->bytes_per_frame == 152*q->channels*q->frame_factor) || (q->bytes_per_frame == 192*q->channels*q->frame_factor)) { } else { av_log(avctx,AV_LOG_ERROR,"Unknown frame/channel/frame_factor configuration %d/%d/%d\n", q->bytes_per_frame, q->channels, q->frame_factor); return -1; } } else if (avctx->extradata_size == 10) { /* Parse the extradata, RM format. */ q->atrac3version = bytestream_get_be32(&edata_ptr); q->samples_per_frame = bytestream_get_be16(&edata_ptr); q->delay = bytestream_get_be16(&edata_ptr); q->codingMode = bytestream_get_be16(&edata_ptr); q->samples_per_channel = q->samples_per_frame / q->channels; q->scrambled_stream = 1; } else { av_log(NULL,AV_LOG_ERROR,"Unknown extradata size %d.\n",avctx->extradata_size); } /* Check the extradata. */ if (q->atrac3version != 4) { av_log(avctx,AV_LOG_ERROR,"Version %d != 4.\n",q->atrac3version); return -1; } if (q->samples_per_frame != 1024 && q->samples_per_frame != 2048) { av_log(avctx,AV_LOG_ERROR,"Unknown amount of samples per frame %d.\n",q->samples_per_frame); return -1; } if (q->delay != 0x88E) { av_log(avctx,AV_LOG_ERROR,"Unknown amount of delay %x != 0x88E.\n",q->delay); return -1; } if (q->codingMode == STEREO) { av_log(avctx,AV_LOG_DEBUG,"Normal stereo detected.\n"); } else if (q->codingMode == JOINT_STEREO) { av_log(avctx,AV_LOG_DEBUG,"Joint stereo detected.\n"); } else { av_log(avctx,AV_LOG_ERROR,"Unknown channel coding mode %x!\n",q->codingMode); return -1; } if (avctx->channels <= 0 || avctx->channels > 2 /*|| ((avctx->channels * 1024) != q->samples_per_frame)*/) { av_log(avctx,AV_LOG_ERROR,"Channel configuration error!\n"); return -1; } if(avctx->block_align >= UINT_MAX/2) return -1; /* Pad the data buffer with FF_INPUT_BUFFER_PADDING_SIZE, * this is for the bitstream reader. */ if ((q->decoded_bytes_buffer = av_mallocz((avctx->block_align+(4-avctx->block_align%4) + FF_INPUT_BUFFER_PADDING_SIZE))) == NULL) return AVERROR(ENOMEM); /* Initialize the VLC tables. */ if (!vlcs_initialized) { for (i=0 ; i<7 ; i++) { spectral_coeff_tab[i].table = &atrac3_vlc_table[atrac3_vlc_offs[i]]; spectral_coeff_tab[i].table_allocated = atrac3_vlc_offs[i + 1] - atrac3_vlc_offs[i]; init_vlc (&spectral_coeff_tab[i], 9, huff_tab_sizes[i], huff_bits[i], 1, 1, huff_codes[i], 1, 1, INIT_VLC_USE_NEW_STATIC); } vlcs_initialized = 1; } init_atrac3_transforms(q); atrac_generate_tables(); /* Generate gain tables. */ for (i=0 ; i<16 ; i++) gain_tab1[i] = powf (2.0, (4 - i)); for (i=-15 ; i<16 ; i++) gain_tab2[i+15] = powf (2.0, i * -0.125); /* init the joint-stereo decoding data */ q->weighting_delay[0] = 0; q->weighting_delay[1] = 7; q->weighting_delay[2] = 0; q->weighting_delay[3] = 7; q->weighting_delay[4] = 0; q->weighting_delay[5] = 7; for (i=0; i<4; i++) { q->matrix_coeff_index_prev[i] = 3; q->matrix_coeff_index_now[i] = 3; q->matrix_coeff_index_next[i] = 3; } dsputil_init(&dsp, avctx); q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels); if (!q->pUnits) { av_free(q->decoded_bytes_buffer); return AVERROR(ENOMEM); } avctx->sample_fmt = AV_SAMPLE_FMT_S16; return 0; } AVCodec ff_atrac3_decoder = { .name = "atrac3", .type = AVMEDIA_TYPE_AUDIO, .id = CODEC_ID_ATRAC3, .priv_data_size = sizeof(ATRAC3Context), .init = atrac3_decode_init, .close = atrac3_decode_close, .decode = atrac3_decode_frame, .long_name = NULL_IF_CONFIG_SMALL("Atrac 3 (Adaptive TRansform Acoustic Coding 3)"), };