From f33aa12011b45814f5cdc59a279111ae3fa5d53a Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 26 Jun 2006 06:00:07 +0000 Subject: [PATCH] stereo decorrelation support by (Justin Ruggles jruggle earthlink net>) Originally committed as revision 5528 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/flacenc.c | 132 +++++++++++++++++++++++++++------- tests/ffmpeg.regression.ref | 4 +- tests/rotozoom.regression.ref | 4 +- 3 files changed, 112 insertions(+), 28 deletions(-) diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c index 29d3ce1fd4..186aad22a4 100644 --- a/libavcodec/flacenc.c +++ b/libavcodec/flacenc.c @@ -177,7 +177,12 @@ static int flac_encode_init(AVCodecContext *avctx) s->blocksize = select_blocksize(s->samplerate); avctx->frame_size = s->blocksize; - s->max_framesize = 14 + (s->blocksize * s->channels * 2); + /* set maximum encoded frame size in verbatim mode */ + if(s->channels == 2) { + s->max_framesize = 14 + ((s->blocksize * 33 + 7) >> 3); + } else { + s->max_framesize = 14 + (s->blocksize * s->channels * 2); + } streaminfo = av_malloc(FLAC_STREAMINFO_SIZE); write_streaminfo(s, streaminfo); @@ -192,7 +197,7 @@ static int flac_encode_init(AVCodecContext *avctx) return 0; } -static int init_frame(FlacEncodeContext *s) +static void init_frame(FlacEncodeContext *s) { int i, ch; FlacFrame *frame; @@ -221,13 +226,6 @@ static int init_frame(FlacEncodeContext *s) for(ch=0; chchannels; ch++) { frame->subframes[ch].obits = 16; } - if(s->channels == 2) { - frame->ch_mode = FLAC_CHMODE_LEFT_RIGHT; - } else { - frame->ch_mode = FLAC_CHMODE_NOT_STEREO; - } - - return 0; } /** @@ -246,6 +244,94 @@ static void copy_samples(FlacEncodeContext *s, int16_t *samples) } } +static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n) +{ + int i, best; + int32_t lt, rt; + uint64_t left, right, mid, side; + uint64_t score[4]; + + /* calculate sum of squares for each channel */ + left = right = mid = side = 0; + for(i=2; i> 1); + side += ABS(lt - rt); + left += ABS(lt); + right += ABS(rt); + } + + /* calculate score for each mode */ + score[0] = left + right; + score[1] = left + side; + score[2] = right + side; + score[3] = mid + side; + + /* return mode with lowest score */ + best = 0; + for(i=1; i<4; i++) { + if(score[i] < score[best]) { + best = i; + } + } + if(best == 0) { + return FLAC_CHMODE_LEFT_RIGHT; + } else if(best == 1) { + return FLAC_CHMODE_LEFT_SIDE; + } else if(best == 2) { + return FLAC_CHMODE_RIGHT_SIDE; + } else { + return FLAC_CHMODE_MID_SIDE; + } +} + +/** + * Perform stereo channel decorrelation + */ +static void channel_decorrelation(FlacEncodeContext *ctx) +{ + FlacFrame *frame; + int32_t *left, *right; + int i, n; + + frame = &ctx->frame; + n = frame->blocksize; + left = frame->subframes[0].samples; + right = frame->subframes[1].samples; + + if(ctx->channels != 2) { + frame->ch_mode = FLAC_CHMODE_NOT_STEREO; + return; + } + + frame->ch_mode = estimate_stereo_mode(left, right, n); + + /* perform decorrelation and adjust bits-per-sample */ + if(frame->ch_mode == FLAC_CHMODE_LEFT_RIGHT) { + return; + } + if(frame->ch_mode == FLAC_CHMODE_MID_SIDE) { + int32_t tmp; + for(i=0; i> 1; + right[i] = tmp - right[i]; + } + frame->subframes[1].obits++; + } else if(frame->ch_mode == FLAC_CHMODE_LEFT_SIDE) { + for(i=0; isubframes[1].obits++; + } else { + for(i=0; isubframes[0].obits++; + } +} + static void encode_residual_verbatim(FlacEncodeContext *s, int ch) { FlacFrame *frame; @@ -359,19 +445,15 @@ output_frame_header(FlacEncodeContext *s) put_bits(&s->pb, 3, 4); /* bits-per-sample code */ put_bits(&s->pb, 1, 0); write_utf8(&s->pb, s->frame_count); - if(frame->bs_code[1] > 0) { - if(frame->bs_code[1] < 256) { - put_bits(&s->pb, 8, frame->bs_code[1]); - } else { - put_bits(&s->pb, 16, frame->bs_code[1]); - } + if(frame->bs_code[0] == 6) { + put_bits(&s->pb, 8, frame->bs_code[1]); + } else if(frame->bs_code[0] == 7) { + put_bits(&s->pb, 16, frame->bs_code[1]); } - if(s->sr_code[1] > 0) { - if(s->sr_code[1] < 256) { - put_bits(&s->pb, 8, s->sr_code[1]); - } else { - put_bits(&s->pb, 16, s->sr_code[1]); - } + if(s->sr_code[0] == 12) { + put_bits(&s->pb, 8, s->sr_code[1]); + } else if(s->sr_code[0] > 12) { + put_bits(&s->pb, 16, s->sr_code[1]); } flush_put_bits(&s->pb); crc = av_crc(av_crc07, 0, s->pb.buf, put_bits_count(&s->pb)>>3); @@ -493,12 +575,12 @@ static int flac_encode_frame(AVCodecContext *avctx, uint8_t *frame, s = avctx->priv_data; s->blocksize = avctx->frame_size; - if(init_frame(s)) { - return 0; - } + init_frame(s); copy_samples(s, samples); + channel_decorrelation(s); + for(ch=0; chchannels; ch++) { encode_residual(s, ch); } @@ -532,6 +614,8 @@ static int flac_encode_frame(AVCodecContext *avctx, uint8_t *frame, static int flac_encode_close(AVCodecContext *avctx) { + av_freep(&avctx->extradata); + avctx->extradata_size = 0; av_freep(&avctx->coded_frame); return 0; } diff --git a/tests/ffmpeg.regression.ref b/tests/ffmpeg.regression.ref index 7b77f226d2..c8864a3346 100644 --- a/tests/ffmpeg.regression.ref +++ b/tests/ffmpeg.regression.ref @@ -176,7 +176,7 @@ stddev:1050.18 PSNR:35.89 bytes:1054720 264236 ./data/a-adpcm_yam.wav e92cec8c07913ffb91ad2b11f79cdc00 *./data/out.wav stddev:18312.68 PSNR:11.06 bytes:1056768 -7bcc7daf968fc489c9b4b7aca5ae380d *./data/a-flac.flac -799336 ./data/a-flac.flac +9ed4957501a56ce9d4e6a6611553a45f *./data/a-flac.flac +801577 ./data/a-flac.flac 0116cdcefd0aeae3ab8e5140c19c725d *./data/out.wav stddev: 51.59 PSNR:62.07 bytes:1032192 diff --git a/tests/rotozoom.regression.ref b/tests/rotozoom.regression.ref index 687420274d..f3b796a565 100644 --- a/tests/rotozoom.regression.ref +++ b/tests/rotozoom.regression.ref @@ -176,7 +176,7 @@ stddev:1050.18 PSNR:35.89 bytes:1054720 264236 ./data/a-adpcm_yam.wav e92cec8c07913ffb91ad2b11f79cdc00 *./data/out.wav stddev:18312.68 PSNR:11.06 bytes:1056768 -7bcc7daf968fc489c9b4b7aca5ae380d *./data/a-flac.flac -799336 ./data/a-flac.flac +9ed4957501a56ce9d4e6a6611553a45f *./data/a-flac.flac +801577 ./data/a-flac.flac 0116cdcefd0aeae3ab8e5140c19c725d *./data/out.wav stddev: 51.59 PSNR:62.07 bytes:1032192