From 9cdd6a24ad90ff087d4d8c93d1b77b238d6d9aa9 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sun, 17 Mar 2002 16:31:38 +0000 Subject: [PATCH] hopefully better bitrate controll Originally committed as revision 334 to svn://svn.ffmpeg.org/ffmpeg/trunk --- ffmpeg.c | 64 +++++++++++++ libavcodec/avcodec.h | 7 ++ libavcodec/motion_est.c | 1 + libavcodec/mpegvideo.c | 197 +++++++++++++++++++++++++++++++++------- libavcodec/mpegvideo.h | 25 ++++- 5 files changed, 258 insertions(+), 36 deletions(-) diff --git a/ffmpeg.c b/ffmpeg.c index b885066ef4..23145ce930 100644 --- a/ffmpeg.c +++ b/ffmpeg.c @@ -75,7 +75,13 @@ static int frame_width = 160; static int frame_height = 128; static int frame_rate = 25 * FRAME_RATE_BASE; static int video_bit_rate = 200000; +static int video_bit_rate_tolerance = 200000; static int video_qscale = 0; +static int video_qmin = 3; +static int video_qmax = 15; +static int video_qdiff = 3; +static float video_qblur = 0.5; +static float video_qcomp = 0.5; static int video_disable = 0; static int video_codec_id = CODEC_ID_NONE; static int same_quality = 0; @@ -1149,6 +1155,11 @@ void opt_video_bitrate(const char *arg) video_bit_rate = atoi(arg) * 1000; } +void opt_video_bitrate_tolerance(const char *arg) +{ + video_bit_rate_tolerance = atoi(arg) * 1000; +} + void opt_frame_rate(const char *arg) { frame_rate = (int)(strtod(arg, 0) * FRAME_RATE_BASE); @@ -1182,6 +1193,45 @@ void opt_qscale(const char *arg) } } +void opt_qmin(const char *arg) +{ + video_qmin = atoi(arg); + if (video_qmin < 0 || + video_qmin > 31) { + fprintf(stderr, "qmin must be >= 1 and <= 31\n"); + exit(1); + } +} + +void opt_qmax(const char *arg) +{ + video_qmax = atoi(arg); + if (video_qmax < 0 || + video_qmax > 31) { + fprintf(stderr, "qmax must be >= 1 and <= 31\n"); + exit(1); + } +} + +void opt_qdiff(const char *arg) +{ + video_qdiff = atoi(arg); + if (video_qdiff < 0 || + video_qdiff > 31) { + fprintf(stderr, "qdiff must be >= 1 and <= 31\n"); + exit(1); + } +} + +void opt_qblur(const char *arg) +{ + video_qblur = atof(arg); +} + +void opt_qcomp(const char *arg) +{ + video_qcomp = atof(arg); +} void opt_audio_bitrate(const char *arg) { @@ -1611,6 +1661,7 @@ void opt_output_file(const char *filename) video_enc->codec_type = CODEC_TYPE_VIDEO; video_enc->bit_rate = video_bit_rate; + video_enc->bit_rate_tolerance = video_bit_rate_tolerance; video_enc->frame_rate = frame_rate; video_enc->width = frame_width; @@ -1623,6 +1674,13 @@ void opt_output_file(const char *filename) video_enc->flags |= CODEC_FLAG_QSCALE; video_enc->quality = video_qscale; } + + video_enc->qmin= video_qmin; + video_enc->qmax= video_qmax; + video_enc->max_qdiff= video_qdiff; + video_enc->qblur= video_qblur; + video_enc->qcompress= video_qcomp; + if (do_psnr) video_enc->get_psnr = 1; else @@ -1948,6 +2006,12 @@ const OptionDef options[] = { { "intra", OPT_BOOL | OPT_EXPERT, {(void*)&intra_only}, "use only intra frames"}, { "vn", OPT_BOOL, {(void*)&video_disable}, "disable video" }, { "qscale", HAS_ARG | OPT_EXPERT, {(void*)opt_qscale}, "use fixed video quantiser scale (VBR)", "q" }, + { "qmin", HAS_ARG | OPT_EXPERT, {(void*)opt_qmin}, "min video quantiser scale (VBR)", "q" }, + { "qmax", HAS_ARG | OPT_EXPERT, {(void*)opt_qmax}, "max video quantiser scale (VBR)", "q" }, + { "qdiff", HAS_ARG | OPT_EXPERT, {(void*)opt_qdiff}, "max difference between the quantiser scale (VBR)", "q" }, + { "qblur", HAS_ARG | OPT_EXPERT, {(void*)opt_qblur}, "video quantiser scale blur (VBR)", "blur" }, + { "qcomp", HAS_ARG | OPT_EXPERT, {(void*)opt_qcomp}, "video quantiser scale compression (VBR)", "compression" }, + { "bt", HAS_ARG, {(void*)opt_video_bitrate_tolerance}, "set video bitrate tolerance (in kbit/s)", "tolerance" }, #ifdef CONFIG_GRAB { "vd", HAS_ARG | OPT_EXPERT, {(void*)opt_video_device}, "set video device", "device" }, #endif diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index cc16cc876f..7245851648 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -72,6 +72,7 @@ extern int motion_estimation_method; typedef struct AVCodecContext { int bit_rate; + int bit_rate_tolerance; /* amount of +- bits (>0)*/ int flags; int sub_id; /* some codecs needs additionnal format info. It is stored there */ @@ -101,6 +102,12 @@ typedef struct AVCodecContext { a key frame (intra, or seekable) */ int quality; /* quality of the previous encoded frame (between 1 (good) and 31 (bad)) */ + float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0)*/ + float qblur; /* amount of qscale smoothing over time (0.0-1.0) */ + int qmin; /* min qscale */ + int qmax; /* max qscale */ + int max_qdiff; /* max qscale difference between frames */ + struct AVCodec *codec; void *priv_data; diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c index 0cdb370f91..e9249692bf 100644 --- a/libavcodec/motion_est.c +++ b/libavcodec/motion_est.c @@ -460,6 +460,7 @@ int estimate_motion(MpegEncContext * s, varc = (varc >> 8) - (sum * sum); s->mb_var[s->mb_width * mb_y + mb_x] = varc; s->avg_mb_var += varc; + s->mc_mb_var += vard; #if 0 printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n", diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 070058d8b8..15af25d7b7 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -276,6 +276,7 @@ int MPV_encode_init(AVCodecContext *avctx) int i; s->bit_rate = avctx->bit_rate; + s->bit_rate_tolerance = avctx->bit_rate_tolerance; s->frame_rate = avctx->frame_rate; s->width = avctx->width; s->height = avctx->height; @@ -284,6 +285,11 @@ int MPV_encode_init(AVCodecContext *avctx) s->rtp_payload_size = avctx->rtp_payload_size; if (avctx->rtp_callback) s->rtp_callback = avctx->rtp_callback; + s->qmin= avctx->qmin; + s->qmax= avctx->qmax; + s->max_qdiff= avctx->max_qdiff; + s->qcompress= avctx->qcompress; + s->qblur= avctx->qblur; s->avctx = avctx; if (s->gop_size <= 1) { @@ -520,7 +526,9 @@ int MPV_encode_picture(AVCodecContext *avctx, mjpeg_picture_trailer(s); flush_put_bits(&s->pb); - s->total_bits += (pbBufPtr(&s->pb) - s->pb.buf) * 8; + s->last_frame_bits= s->frame_bits; + s->frame_bits = (pbBufPtr(&s->pb) - s->pb.buf) * 8; + s->total_bits += s->frame_bits; avctx->quality = s->qscale; if (avctx->get_psnr) { @@ -1040,6 +1048,36 @@ static void encode_picture(MpegEncContext *s, int picture_number) int i, motion_x, motion_y; s->picture_number = picture_number; + + s->last_mc_mb_var = s->mc_mb_var; + /* Reset the average MB variance */ + s->avg_mb_var = 0; + s->mc_mb_var = 0; + + /* Estimate motion for every MB */ + for(mb_y=0; mb_y < s->mb_height; mb_y++) { + for(mb_x=0; mb_x < s->mb_width; mb_x++) { + int xy= mb_y * s->mb_width + mb_x; + s->mb_x = mb_x; + s->mb_y = mb_y; + + /* compute motion vector and macro block type (intra or non intra) */ + motion_x = 0; + motion_y = 0; + if (s->pict_type == P_TYPE) { + s->mb_intra = estimate_motion(s, mb_x, mb_y, + &motion_x, + &motion_y); + } else { + s->mb_intra = 1; + } + /* Store MB type and MV */ + s->mb_type[xy] = s->mb_intra; + s->mv_table[0][xy] = motion_x; + s->mv_table[1][xy] = motion_y; + } + } + if (!s->fixed_qscale) s->qscale = rate_estimate_qscale(s); @@ -1094,33 +1132,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) else s->gob_index = 4; } - - /* Reset the average MB variance */ - s->avg_mb_var = 0; - - /* Estimate motion for every MB */ - for(mb_y=0; mb_y < s->mb_height; mb_y++) { - for(mb_x=0; mb_x < s->mb_width; mb_x++) { - s->mb_x = mb_x; - s->mb_y = mb_y; - - /* compute motion vector and macro block type (intra or non intra) */ - motion_x = 0; - motion_y = 0; - if (s->pict_type == P_TYPE) { - s->mb_intra = estimate_motion(s, mb_x, mb_y, - &motion_x, - &motion_y); - } else { - s->mb_intra = 1; - } - /* Store MB type and MV */ - s->mb_type[mb_y * s->mb_width + mb_x] = s->mb_intra; - s->mv_table[0][mb_y * s->mb_width + mb_x] = motion_x; - s->mv_table[1][mb_y * s->mb_width + mb_x] = motion_y; - } - } - + s->avg_mb_var = s->avg_mb_var / s->mb_num; s->block_wrap[0]= @@ -1542,6 +1554,22 @@ static void dct_unquantize_h263_c(MpegEncContext *s, static void rate_control_init(MpegEncContext *s) { +#if 1 + emms_c(); + + //initial values, they dont really matter as they will be totally different within a few frames + s->i_pred.coeff= s->p_pred.coeff= 7.0; + s->i_pred.count= s->p_pred.count= 1.0; + + s->i_pred.decay= s->p_pred.decay= 0.4; + + // use more bits at the beginning, otherwise high motion at the begin will look like shit + s->qsum=100; + s->qcount=100; + + s->short_term_qsum=0.001; + s->short_term_qcount=0.001; +#else s->wanted_bits = 0; if (s->intra_only) { @@ -1552,24 +1580,122 @@ static void rate_control_init(MpegEncContext *s) (float)((float)s->frame_rate / FRAME_RATE_BASE * (I_FRAME_SIZE_RATIO + s->gop_size - 1))); s->I_frame_bits = (int)(s->P_frame_bits * I_FRAME_SIZE_RATIO); } - + #if defined(DEBUG) printf("I_frame_size=%d P_frame_size=%d\n", s->I_frame_bits, s->P_frame_bits); #endif +#endif } +static double predict(Predictor *p, double q, double var) +{ + return p->coeff*var / (q*p->count); +} + +static void update_predictor(Predictor *p, double q, double var, double size) +{ + double new_coeff= size*q / (var + 1); + if(var<1000) return; +/*{ +int pred= predict(p, q, var); +int error= abs(pred-size); +static double sum=0; +static int count=0; +if(count>5) sum+=error; +count++; +if(256*256*256*64%count==0){ + printf("%d %f %f\n", count, sum/count, p->coeff); +} +}*/ + p->count*= p->decay; + p->coeff*= p->decay; + p->count++; + p->coeff+= new_coeff; +} -/* - * This heuristic is rather poor, but at least we do not have to - * change the qscale at every macroblock. - */ static int rate_estimate_qscale(MpegEncContext *s) { +#if 1 + int qmin= s->qmin; + int qmax= s->qmax; + int rate_q=5; + float q; + int qscale; + float br_compensation; + double diff; + double short_term_q; + double long_term_q; + int last_qscale= s->qscale; + double fps; + INT64 wanted_bits; + emms_c(); + + fps= (double)s->frame_rate / FRAME_RATE_BASE; + wanted_bits= s->bit_rate*(double)s->picture_number/fps; + + + if(s->picture_number>2){ + /* update predictors */ + if(s->last_pict_type == I_TYPE){ + //FIXME + }else{ //P Frame +//printf("%d %d %d %f\n", s->qscale, s->last_mc_mb_var, s->frame_bits, s->p_pred.coeff); + update_predictor(&s->p_pred, s->qscale, s->last_mc_mb_var, s->frame_bits); + } + } + + if(s->pict_type == I_TYPE){ + //FIXME + rate_q= s->qsum/s->qcount; + }else{ //P Frame + int i; + int diff, best_diff=1000000000; + for(i=1; i<=31; i++){ + diff= predict(&s->p_pred, i, s->mc_mb_var) - (double)s->bit_rate/fps; + if(diff<0) diff= -diff; + if(diffshort_term_qsum*=s->qblur; + s->short_term_qcount*=s->qblur; + + s->short_term_qsum+= rate_q; + s->short_term_qcount++; + short_term_q= s->short_term_qsum/s->short_term_qcount; + + long_term_q= s->qsum/s->qcount*s->total_bits/wanted_bits; + +// q= (long_term_q - short_term_q)*s->qcompress + short_term_q; + q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q); + + diff= s->total_bits - wanted_bits; + br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance; + q/=br_compensation; + + qscale= (int)(q + 0.5); + if (qscaleqmax) qscale=qmax; + + if (qscalemax_qdiff) qscale=last_qscale-s->max_qdiff; + else if(qscale>last_qscale+s->max_qdiff) qscale=last_qscale+s->max_qdiff; + + s->qsum+= qscale; + s->qcount++; + + s->last_pict_type= s->pict_type; +//printf("q:%d diff:%d comp:%f rate_q:%d st_q:%d fvar:%d last_size:%d\n", qscale, (int)diff, br_compensation, +// rate_q, (int)short_term_q, s->mc_mb_var, s->frame_bits); +//printf("%d %d\n", s->bit_rate, (int)fps); + return qscale; +#else INT64 diff, total_bits = s->total_bits; float q; - int qscale, qmin; - + int qscale; if (s->pict_type == I_TYPE) { s->wanted_bits += s->I_frame_bits; } else { @@ -1600,6 +1726,7 @@ static int rate_estimate_qscale(MpegEncContext *s) (int)diff, q); #endif return qscale; +#endif } AVCodec mpeg1video_encoder = { diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index da70ed612f..8bccde6f17 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -34,6 +34,12 @@ enum OutputFormat { #define QMAT_SHIFT_MMX 19 #define QMAT_SHIFT 25 +typedef struct Predictor{ + double coeff; + double count; + double decay; +} Predictor; + typedef struct MpegEncContext { struct AVCodecContext *avctx; /* the following parameters must be initialized before encoding */ @@ -42,6 +48,7 @@ typedef struct MpegEncContext { int frame_rate; /* number of frames per second */ int intra_only; /* if true, only intra pictures are generated */ int bit_rate; /* wanted bit rate */ + int bit_rate_tolerance; /* amount of +- bits (>0)*/ enum OutputFormat out_format; /* output format */ int h263_plus; /* h263 plus headers */ int h263_rv10; /* use RV10 variation for H263 */ @@ -49,6 +56,11 @@ typedef struct MpegEncContext { int h263_msmpeg4; /* generate MSMPEG4 compatible stream */ int h263_intel; /* use I263 intel h263 header */ int fixed_qscale; /* fixed qscale if non zero */ + float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0) */ + float qblur; /* amount of qscale smoothing over time (0.0-1.0) */ + int qmin; /* min qscale */ + int qmax; /* max qscale */ + int max_qdiff; /* max qscale difference between frames */ int encoding; /* true if we are encoding (vs decoding) */ /* the following fields are managed internally by the encoder */ @@ -85,6 +97,7 @@ typedef struct MpegEncContext { int qscale; int pict_type; int last_non_b_pict_type; /* used for mpeg4 gmc b-frames */ + int last_pict_type; /* used for bit rate stuff (needs that to update the right predictor) */ int frame_rate_index; /* motion compensation */ int unrestricted_mv; @@ -146,9 +159,19 @@ typedef struct MpegEncContext { int I_frame_bits; /* wanted number of bits per I frame */ int P_frame_bits; /* same for P frame */ int avg_mb_var; /* average MB variance for current frame */ + int mc_mb_var; /* motion compensated MB variance for current frame */ + int last_mc_mb_var; /* motion compensated MB variance for last frame */ INT64 wanted_bits; INT64 total_bits; - + int frame_bits; /* bits used for the current frame */ + int last_frame_bits; /* bits used for the last frame */ + Predictor i_pred; + Predictor p_pred; + double qsum; /* sum of qscales */ + double qcount; /* count of qscales */ + double short_term_qsum; /* sum of recent qscales */ + double short_term_qcount; /* count of recent qscales */ + /* H.263 specific */ int gob_number; int gob_index;