adaptive quantization (lumi/temporal & spatial complexity masking)

Originally committed as revision 964 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Michael Niedermayer 2002-09-23 14:56:11 +00:00
parent d46aba2642
commit c5d309f2d5
6 changed files with 267 additions and 31 deletions

View File

@ -5,8 +5,8 @@
#define LIBAVCODEC_VERSION_INT 0x000406
#define LIBAVCODEC_VERSION "0.4.6"
#define LIBAVCODEC_BUILD 4624
#define LIBAVCODEC_BUILD_STR "4624"
#define LIBAVCODEC_BUILD 4625
#define LIBAVCODEC_BUILD_STR "4625"
enum CodecID {
CODEC_ID_NONE,
@ -123,6 +123,8 @@ static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG,
#define CODEC_FLAG_DR1 0x8000 /* direct renderig type 1 (store internal frames in external buffers) */
#define CODEC_FLAG_NOT_TRUNCATED 0x00010000 /* input bitstream is not truncated, except before a startcode
allows the last part of a frame to be decoded earlier */
#define CODEC_FLAG_NORMALIZE_AQP 0x00020000 /* normalize adaptive quantization */
/* codec capabilities */
#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 /* decoder can use draw_horiz_band callback */
@ -174,8 +176,8 @@ typedef struct AVCodecContext {
* some codecs need / can use extra-data like huffman tables
* mjpeg: huffman tables
* rv10: additional flags
* encoding: set/allocated/freed by user.
* decoding: set/allocated/freed by lavc. (can be NULL)
* encoding: set/allocated/freed by lavc.
* decoding: set/allocated/freed by user.
*/
void *extradata;
int extradata_size;
@ -285,8 +287,13 @@ typedef struct AVCodecContext {
uint8_t *mbskip_table;
/* encoding parameters */
/**
* quality (between 1 (good) and 31 (bad))
* encoding: set by user if CODEC_FLAG_QSCALE is set otherwise set by lavc
* decoding: set by lavc
*/
int quality; /* quality of the previous encoded frame
(between 1 (good) and 31 (bad))
this is allso used to set the quality in vbr mode
and the per frame quality in CODEC_FLAG_TYPE (second pass mode) */
float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0)*/
@ -632,6 +639,34 @@ typedef struct AVCodecContext {
* decoding; set by lavc
*/
long long int pts;
/**
* luminance masking (0-> disabled)
* encoding: set by user
* decoding: unused
*/
float lumi_masking;
/**
* temporary complexity masking (0-> disabled)
* encoding: set by user
* decoding: unused
*/
float temporal_cplx_masking;
/**
* spatial complexity masking (0-> disabled)
* encoding: set by user
* decoding: unused
*/
float spatial_cplx_masking;
/**
* p block masking (0-> disabled)
* encoding: set by user
* decoding: unused
*/
float p_masking;
//FIXME this should be reordered after kabis API is finished ...
//TODO kill kabi

View File

@ -31,11 +31,17 @@
#include "h263data.h"
#include "mpeg4data.h"
//#undef NDEBUG
//#include <assert.h>
//rounded divison & shift
#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
#if 1
#define PRINT_MB_TYPE(a) {}
//#define PRINT_MB_TYPE(a) printf(a)
#else
#define PRINT_MB_TYPE(a) printf(a)
#endif
#define INTRA_MCBPC_VLC_BITS 6
#define INTER_MCBPC_VLC_BITS 6
@ -298,6 +304,54 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
return score0 > score1 ? 1 : 0;
}
void ff_clean_mpeg4_qscales(MpegEncContext *s){
int i;
/* more braindead iso mpeg mess */
for(i=1; i<s->mb_num; i++){
if(s->qscale_table[i] - s->qscale_table[i-1] >2)
s->qscale_table[i]= s->qscale_table[i-1]+2;
}
for(i=s->mb_num-2; i>=0; i--){
if(s->qscale_table[i] - s->qscale_table[i+1] >2)
s->qscale_table[i]= s->qscale_table[i+1]+2;
}
for(i=1; i<s->mb_num; i++){
if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_INTER4V)){
s->mb_type[i]&= ~MB_TYPE_INTER4V;
s->mb_type[i]|= MB_TYPE_INTER;
}
}
if(s->pict_type== B_TYPE){
int odd=0;
/* ok, come on, this isnt funny anymore, theres more code for handling this mpeg4 mess than
for the actual adaptive quantization */
for(i=0; i<s->mb_num; i++){
odd += s->qscale_table[i]&1;
}
if(2*odd > s->mb_num) odd=1;
else odd=0;
for(i=0; i<s->mb_num; i++){
if((s->qscale_table[i]&1) != odd)
s->qscale_table[i]++;
if(s->qscale_table[i] > 31)
s->qscale_table[i]= 31;
}
for(i=1; i<s->mb_num; i++){
if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_DIRECT)){
s->mb_type[i]&= ~MB_TYPE_DIRECT;
s->mb_type[i]|= MB_TYPE_BIDIR;
}
}
}
}
void mpeg4_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y)
@ -308,6 +362,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=B_TYPE ? &s->tex_pb : &s->pb;
PutBitContext * const dc_pb = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2 : &s->pb;
const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0;
const int dquant_code[5]= {1,0,9,2,3};
// printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
if (!s->mb_intra) {
@ -328,20 +383,27 @@ void mpeg4_encode_mb(MpegEncContext * s,
s->last_mv[1][0][0]=
s->last_mv[1][0][1]= 0;
}
assert(s->dquant>=-2 && s->dquant<=2);
assert((s->dquant&1)==0);
assert(mb_type>=0);
/* nothing to do if this MB was skiped in the next P Frame */
if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){
if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){ //FIXME avoid DCT & ...
s->skip_count++;
s->mv[0][0][0]=
s->mv[0][0][1]=
s->mv[1][0][0]=
s->mv[1][0][1]= 0;
s->mv_dir= MV_DIR_FORWARD; //doesnt matter
s->qscale -= s->dquant;
return;
}
if ((cbp | motion_x | motion_y | mb_type) ==0) {
/* direct MB with MV={0,0} */
assert(s->dquant==0);
put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */
if(interleaved_stats){
@ -356,8 +418,13 @@ void mpeg4_encode_mb(MpegEncContext * s,
put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we dont need it :)
if(cbp) put_bits(&s->pb, 6, cbp);
if(cbp && mb_type)
put_bits(&s->pb, 1, 0); /* no q-scale change */
if(cbp && mb_type){
if(s->dquant)
put_bits(&s->pb, 2, (s->dquant>>2)+3);
else
put_bits(&s->pb, 1, 0);
}else
s->qscale -= s->dquant;
if(interleaved_stats){
bits= get_bit_count(&s->pb);
@ -421,7 +488,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
s->last_bits=bits;
}
}else{ /* s->pict_type==B_TYPE */
if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) {
if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) {
/* check if the B frames can skip it too, as we must skip it if we skip here
why didnt they just compress the skip-mb bits instead of reusing them ?! */
if(s->max_b_frames>0){
@ -470,12 +537,16 @@ void mpeg4_encode_mb(MpegEncContext * s,
put_bits(&s->pb, 1, 0); /* mb coded */
if(s->mv_type==MV_TYPE_16X16){
cbpc = cbp & 3;
if(s->dquant) cbpc+= 8;
put_bits(&s->pb,
inter_MCBPC_bits[cbpc],
inter_MCBPC_code[cbpc]);
cbpy = cbp >> 2;
cbpy ^= 0xf;
put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
if(s->dquant)
put_bits(pb2, 2, dquant_code[s->dquant+2]);
if(interleaved_stats){
bits= get_bit_count(&s->pb);
@ -580,10 +651,12 @@ void mpeg4_encode_mb(MpegEncContext * s,
cbpc = cbp & 3;
if (s->pict_type == I_TYPE) {
if(s->dquant) cbpc+=4;
put_bits(&s->pb,
intra_MCBPC_bits[cbpc],
intra_MCBPC_code[cbpc]);
} else {
if(s->dquant) cbpc+=8;
put_bits(&s->pb, 1, 0); /* mb coded */
put_bits(&s->pb,
inter_MCBPC_bits[cbpc + 4],
@ -592,6 +665,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
put_bits(pb2, 1, s->ac_pred);
cbpy = cbp >> 2;
put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
if(s->dquant)
put_bits(dc_pb, 2, dquant_code[s->dquant+2]);
if(interleaved_stats){
bits= get_bit_count(&s->pb);
@ -963,6 +1038,7 @@ static void h263_encode_motion(MpegEncContext * s, int val, int f_code)
} else if (val >= l) {
val -= m;
}
assert(val>=-l && val<l);
if (val >= 0) {
sign = 0;

View File

@ -1142,6 +1142,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
s->mb_var [s->mb_width * mb_y + mb_x] = varc;
s->mc_mb_var[s->mb_width * mb_y + mb_x] = vard;
s->mb_mean [s->mb_width * mb_y + mb_x] = (sum+7)>>4;
s->mb_var_sum += varc;
s->mc_mb_var_sum += vard;
//printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);

View File

@ -207,6 +207,7 @@ int MPV_common_init(MpegEncContext *s)
CHECKED_ALLOCZ(s->mb_var , s->mb_num * sizeof(INT16))
CHECKED_ALLOCZ(s->mc_mb_var, s->mb_num * sizeof(INT16))
CHECKED_ALLOCZ(s->mb_mean , s->mb_num * sizeof(INT8))
/* Allocate MV tables */
CHECKED_ALLOCZ(s->p_mv_table , mv_table_size * 2 * sizeof(INT16))
@ -329,6 +330,7 @@ void MPV_common_end(MpegEncContext *s)
av_freep(&s->mb_type);
av_freep(&s->mb_var);
av_freep(&s->mc_mb_var);
av_freep(&s->mb_mean);
av_freep(&s->p_mv_table);
av_freep(&s->b_forw_mv_table);
av_freep(&s->b_back_mv_table);
@ -442,6 +444,12 @@ int MPV_encode_init(AVCodecContext *avctx)
/* Fixed QSCALE */
s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
s->adaptive_quant= ( s->avctx->lumi_masking
|| s->avctx->temporal_cplx_masking
|| s->avctx->spatial_cplx_masking
|| s->avctx->p_masking)
&& !s->fixed_qscale;
switch(avctx->codec->id) {
case CODEC_ID_MPEG1VIDEO:
s->out_format = FMT_MPEG1;
@ -893,7 +901,8 @@ int MPV_encode_picture(AVCodecContext *avctx,
if (s->out_format == FMT_MJPEG)
mjpeg_picture_trailer(s);
avctx->quality = s->qscale;
if(!s->fixed_qscale)
avctx->quality = s->qscale;
if(s->flags&CODEC_FLAG_PASS1)
ff_write_pass1_stats(s);
@ -1753,6 +1762,24 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
}
#endif
for(i=0; i<6; i++) skip_dct[i]=0;
if(s->adaptive_quant){
s->dquant= s->qscale_table[mb_x + mb_y*s->mb_width] - s->qscale;
if(s->codec_id==CODEC_ID_MPEG4){
if (s->dquant> 2) s->dquant= 2;
else if(s->dquant<-2) s->dquant=-2;
if(!s->mb_intra){
assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
if(s->mv_dir&MV_DIRECT)
s->dquant=0;
}
}
s->qscale+= s->dquant;
s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
}
if (s->mb_intra) {
UINT8 *ptr;
@ -2080,6 +2107,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
ff_set_mpeg4_time(s, s->picture_number);
s->scene_change_score=0;
s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
/* Estimate motion for every MB */
if(s->pict_type != I_TYPE){
@ -2125,7 +2154,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
sum= (sum+8)>>4;
varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8;
s->mb_var[s->mb_width * mb_y + mb_x] = varc;
s->mb_var [s->mb_width * mb_y + mb_x] = varc;
s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+7)>>4;
s->mb_var_sum += varc;
}
}
@ -2154,12 +2184,19 @@ static void encode_picture(MpegEncContext *s, int picture_number)
ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
}
//printf("f_code %d ///\n", s->f_code);
// printf("%d %d\n", s->avg_mb_var, s->mc_mb_var);
if (!s->fixed_qscale)
s->qscale = ff_rate_estimate_qscale(s);
if (s->fixed_qscale)
s->frame_qscale = s->avctx->quality;
else
s->frame_qscale = ff_rate_estimate_qscale(s);
if(s->adaptive_quant && s->codec_id==CODEC_ID_MPEG4)
ff_clean_mpeg4_qscales(s);
if(s->adaptive_quant)
s->qscale= s->qscale_table[0];
else
s->qscale= (int)(s->frame_qscale + 0.5);
if (s->out_format == FMT_MJPEG) {
/* for mjpeg, we do include qscale in the matrix */
s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];

View File

@ -53,7 +53,7 @@ typedef struct Predictor{
typedef struct RateControlEntry{
int pict_type;
int qscale;
float qscale;
int mv_bits;
int i_tex_bits;
int p_tex_bits;
@ -188,6 +188,9 @@ typedef struct MpegEncContext {
int input_pict_type; /* pict_type prior to reordering of frames */
int force_type; /* 0= no force, otherwise I_TYPE, P_TYPE, ... */
int qscale; /* QP */
float frame_qscale; /* qscale from the frame level rc */
int adaptive_quant; /* use adaptive quantization */
int dquant; /* qscale difference to prev qscale */
int pict_type; /* I_TYPE, P_TYPE, B_TYPE, ... */
int last_pict_type;
int last_non_b_pict_type; /* used for mpeg4 gmc b-frames & ratecontrol */
@ -241,13 +244,14 @@ typedef struct MpegEncContext {
int hurry_up; /* when set to 1 during decoding, b frames will be skiped
when set to 2 idct/dequant will be skipped too */
/* macroblock layer */
int mb_x, mb_y;
int mb_incr;
int mb_intra;
UINT16 *mb_var; /* Table for MB variances */
UINT16 *mc_mb_var; /* Table for motion compensated MB variances */
UINT8 *mb_mean; /* Table for MB luminance */
UINT8 *mb_type; /* Table for MB type */
#define MB_TYPE_INTRA 0x01
#define MB_TYPE_INTER 0x02
@ -582,6 +586,7 @@ void ff_mpeg4_stuffing(PutBitContext * pbc);
void ff_mpeg4_init_partitions(MpegEncContext *s);
void ff_mpeg4_merge_partitions(MpegEncContext *s);
extern inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr);
void ff_clean_mpeg4_qscales(MpegEncContext *s);
/* rv10.c */
void rv10_encode_picture_header(MpegEncContext *s, int picture_number);
@ -611,8 +616,7 @@ void mjpeg_picture_trailer(MpegEncContext *s);
/* rate control */
int ff_rate_control_init(MpegEncContext *s);
int ff_rate_estimate_qscale(MpegEncContext *s);
int ff_rate_estimate_qscale_pass2(MpegEncContext *s);
float ff_rate_estimate_qscale(MpegEncContext *s);
void ff_write_pass1_stats(MpegEncContext *s);
void ff_rate_control_uninit(MpegEncContext *s);
double ff_eval(char *s, double *const_value, char **const_name,

View File

@ -38,9 +38,9 @@ static int init_pass2(MpegEncContext *s);
static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num);
void ff_write_pass1_stats(MpegEncContext *s){
sprintf(s->avctx->stats_out, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n",
sprintf(s->avctx->stats_out, "in:%d out:%d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n",
s->picture_number, s->input_picture_number - s->max_b_frames, s->pict_type,
s->qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits,
s->frame_qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits,
s->f_code, s->b_code, s->mc_mb_var_sum, s->mb_var_sum, s->i_count);
}
@ -105,7 +105,7 @@ int ff_rate_control_init(MpegEncContext *s)
assert(picture_number < rcc->num_entries);
rce= &rcc->entry[picture_number];
e+=sscanf(p, " in:%*d out:%*d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d",
e+=sscanf(p, " in:%*d out:%*d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d",
&rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits,
&rce->f_code, &rce->b_code, &rce->mc_mb_var_sum, &rce->mb_var_sum, &rce->i_count);
if(e!=12){
@ -433,7 +433,7 @@ static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q,
q= exp(q);
}
return q;
}
@ -462,10 +462,89 @@ static void update_predictor(Predictor *p, double q, double var, double size)
p->coeff+= new_coeff;
}
int ff_rate_estimate_qscale(MpegEncContext *s)
static void adaptive_quantization(MpegEncContext *s, double q){
int i;
const float lumi_masking= s->avctx->lumi_masking / (128.0*128.0);
const float temp_cplx_masking= s->avctx->temporal_cplx_masking;
const float spatial_cplx_masking = s->avctx->spatial_cplx_masking;
const float p_masking = s->avctx->p_masking;
float bits_sum= 0.0;
float cplx_sum= 0.0;
float cplx_tab[s->mb_num];
float bits_tab[s->mb_num];
const int qmin= 2; //s->avctx->mb_qmin;
const int qmax= 31; //s->avctx->mb_qmax;
for(i=0; i<s->mb_num; i++){
float temp_cplx= sqrt(s->mc_mb_var[i]);
float spat_cplx= sqrt(s->mb_var[i]);
const int lumi= s->mb_mean[i];
float bits, cplx, factor;
if(spat_cplx < q/3) spat_cplx= q/3; //FIXME finetune
if(temp_cplx < q/3) temp_cplx= q/3; //FIXME finetune
if((s->mb_type[i]&MB_TYPE_INTRA)){//FIXME hq mode
cplx= spat_cplx;
factor= 1.0 + p_masking;
}else{
cplx= temp_cplx;
factor= pow(temp_cplx, - temp_cplx_masking);
}
factor*=pow(spat_cplx, - spatial_cplx_masking);
factor*= (1.0 - (lumi-128)*(lumi-128)*lumi_masking);
if(factor<0.00001) factor= 0.00001;
bits= cplx*factor;
cplx_sum+= cplx;
bits_sum+= bits;
cplx_tab[i]= cplx;
bits_tab[i]= bits;
}
/* handle qmin/qmax cliping */
if(s->flags&CODEC_FLAG_NORMALIZE_AQP){
for(i=0; i<s->mb_num; i++){
float newq= q*cplx_tab[i]/bits_tab[i];
newq*= bits_sum/cplx_sum;
if (newq > qmax){
bits_sum -= bits_tab[i];
cplx_sum -= cplx_tab[i]*q/qmax;
}
else if(newq < qmin){
bits_sum -= bits_tab[i];
cplx_sum -= cplx_tab[i]*q/qmin;
}
}
}
for(i=0; i<s->mb_num; i++){
float newq= q*cplx_tab[i]/bits_tab[i];
int intq;
if(s->flags&CODEC_FLAG_NORMALIZE_AQP){
newq*= bits_sum/cplx_sum;
}
if(i && ABS(s->qscale_table[i-1] - newq)<0.75)
intq= s->qscale_table[i-1];
else
intq= (int)(newq + 0.5);
if (intq > qmax) intq= qmax;
else if(intq < qmin) intq= qmin;
//if(i%s->mb_width==0) printf("\n");
//printf("%2d%3d ", intq, ff_sqrt(s->mc_mb_var[i]));
s->qscale_table[i]= intq;
}
}
float ff_rate_estimate_qscale(MpegEncContext *s)
{
float q;
int qscale, qmin, qmax;
int qmin, qmax;
float br_compensation;
double diff;
double short_term_q;
@ -581,16 +660,20 @@ int ff_rate_estimate_qscale(MpegEncContext *s)
// printf("%f %d %d %d\n", q, picture_number, (int)wanted_bits, (int)s->total_bits);
//printf("%f %f %f\n", q, br_compensation, short_term_q);
qscale= (int)(q + 0.5);
//printf("q:%d diff:%d comp:%f st_q:%f last_size:%d type:%d\n", qscale, (int)diff, br_compensation,
// short_term_q, s->frame_bits, pict_type);
//printf("%d %d\n", s->bit_rate, (int)fps);
rcc->last_qscale= qscale;
if(s->adaptive_quant)
adaptive_quantization(s, q);
else
q= (int)(q + 0.5);
rcc->last_qscale= q;
rcc->last_mc_mb_var_sum= s->mc_mb_var_sum;
rcc->last_mb_var_sum= s->mb_var_sum;
return qscale;
return q;
}
//----------------------------------------------