new motion estimation (epzs) not complete yet but allready pretty good :)

unlimited mv search range
minor bugfix in the mpeg4 header parser
reset picture in gop counter if scene change is detected

Originally committed as revision 344 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Michael Niedermayer 2002-03-22 02:21:17 +00:00
parent daa5764137
commit 45870f5718
8 changed files with 363 additions and 83 deletions

View File

@ -57,6 +57,8 @@ extern int motion_estimation_method;
#define ME_FULL 1
#define ME_LOG 2
#define ME_PHODS 3
#define ME_EPZS 4
#define ME_X1 5
/* encoding support */

View File

@ -128,6 +128,7 @@ void init_get_bits(GetBitContext *s,
s->bit_cnt += 8;
}
#endif
s->size= buffer_size;
}
#ifndef ALT_BITSTREAM_READER

View File

@ -197,6 +197,7 @@ typedef struct GetBitContext {
int bit_cnt;
UINT8 *buf, *buf_ptr, *buf_end;
#endif
int size;
} GetBitContext;
typedef struct VLC {
@ -787,6 +788,24 @@ static inline int av_log2(unsigned int v)
return n;
}
/* median of 3 */
static inline int mid_pred(int a, int b, int c)
{
int vmin, vmax;
vmax = vmin = a;
if (b < vmin)
vmin = b;
else
vmax = b;
if (c < vmin)
vmin = c;
else if (c > vmax)
vmax = c;
return a + b + c - vmin - vmax;
}
/* memory */
void *av_mallocz(int size);

View File

@ -17,6 +17,8 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* ac prediction encoding by Michael Niedermayer <michaelni@gmx.at>
*/
#include "common.h"
#include "dsputil.h"
@ -45,9 +47,11 @@ static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
static inline int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr);
static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
int dir);
extern UINT32 inverse[256];
static UINT16 mv_penalty[MAX_FCODE][MAX_MV*2+1];
static UINT8 fcode_tab[MAX_MV*2+1];
int h263_get_picture_format(int width, int height)
{
int format;
@ -524,24 +528,6 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
ac_val1[8 + i] = block[block_permute_op(i)];
}
static inline int mid_pred(int a, int b, int c)
{
int vmin, vmax;
vmax = vmin = a;
if (b < vmin)
vmin = b;
else
vmax = b;
if (c < vmin)
vmin = c;
else if (c > vmax)
vmax = c;
return a + b + c - vmin - vmax;
}
INT16 *h263_pred_motion(MpegEncContext * s, int block,
int *px, int *py)
{
@ -648,7 +634,46 @@ static void h263p_encode_umotion(MpegEncContext * s, int val)
}
}
void h263_encode_init_vlc(MpegEncContext *s)
static void init_mv_penalty_and_fcode(MpegEncContext *s)
{
int f_code;
int mv;
for(f_code=1; f_code<=MAX_FCODE; f_code++){
for(mv=-MAX_MV; mv<=MAX_MV; mv++){
int len;
if(mv==0) len= mvtab[0][1];
else{
int val, bit_size, range, code;
bit_size = s->f_code - 1;
range = 1 << bit_size;
val=mv;
if (val < 0)
val = -val;
val--;
code = (val >> bit_size) + 1;
if(code<33){
len= mvtab[code][1] + 1 + bit_size;
}else{
len= mvtab[32][1] + 2 + bit_size;
}
}
mv_penalty[f_code][mv+MAX_MV]= len;
}
}
for(f_code=MAX_FCODE; f_code>0; f_code--){
for(mv=-(16<<f_code); mv<(16<<f_code); mv++){
fcode_tab[mv+MAX_MV]= f_code;
}
}
}
void h263_encode_init(MpegEncContext *s)
{
static int done = 0;
@ -656,7 +681,13 @@ void h263_encode_init_vlc(MpegEncContext *s)
done = 1;
init_rl(&rl_inter);
init_rl(&rl_intra);
init_mv_penalty_and_fcode(s);
}
s->mv_penalty= mv_penalty;
// use fcodes >1 only for mpeg4 FIXME
if(!s->h263_msmpeg4 && s->h263_pred) s->fcode_tab= fcode_tab;
}
static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
@ -2094,9 +2125,8 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
break;
}
state = ((state << 8) | v) & 0xffffff;
/* XXX: really detect end of frame */
if (state == 0){
printf("illegal zero code found\n");
if( get_bits_count(&s->gb) > s->gb.size*8){
printf("no VOP startcode found\n");
return -1;
}
}
@ -2152,6 +2182,7 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
if(width && height){ /* they should be non zero but who knows ... */
s->width = width;
s->height = height;
// printf("%d %d\n", width, height);
}
}

View File

@ -16,6 +16,8 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
*/
#include <stdlib.h>
#include <stdio.h>
@ -25,7 +27,8 @@
static void halfpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax);
int xmin, int ymin, int xmax, int ymax,
int pred_x, int pred_y);
/* config it to test motion vector encoding (send random vectors) */
//#define CONFIG_TEST_MV_ENCODE
@ -328,67 +331,209 @@ static int phods_motion_search(MpegEncContext * s,
return dminy;
}
#define Z_THRESHOLD 256
#define CHECK_MV(x,y)\
d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride, 16);\
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
if(d<dmin){\
best[0]=x;\
best[1]=y;\
dmin=d;\
}
#define CHECK_MV_DIR(x,y,new_dir)\
{\
d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride, 16);\
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
if(d<dmin){\
best[0]=x;\
best[1]=y;\
dmin=d;\
next_dir= new_dir;\
}\
}
static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
int xmin, int ymin, int xmax, int ymax, int shift)
{
int next_dir=-1;
for(;;){
int d;
const int dir= next_dir;
const int x= best[0];
const int y= best[1];
next_dir=-1;
//printf("%d", dir);
if(dir!=2 && x-1>=xmin) CHECK_MV_DIR(x-1, y , 0)
if(dir!=3 && y-1>=ymin) CHECK_MV_DIR(x , y-1, 1)
if(dir!=0 && x+1<=xmax) CHECK_MV_DIR(x+1, y , 2)
if(dir!=1 && y+1<=ymax) CHECK_MV_DIR(x , y+1, 3)
if(next_dir==-1){
return dmin;
}
}
}
static int epzs_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr,
int *px_ptr, int *py_ptr,
int xmin, int ymin, int xmax, int ymax)
{
INT16 P_left[2], P_top[2], P_topright[2], P_last[2];
static const int off[4]= {2, 1, 1, -1};
int best[2]={0, 0};
int d, dmin;
UINT8 *new_pic, *old_pic;
const int pic_stride= s->linesize;
const int pic_xy= (s->mb_y*pic_stride + s->mb_x)*16;
const int mot_stride = s->block_wrap[0];
const int mot_xy = s->block_index[0];
UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
int quant= s->qscale; // qscale of the prev frame
int pred_x, pred_y;
const int shift= 1+s->quarter_sample;
new_pic = s->new_picture[0] + pic_xy;
old_pic = s->last_picture[0] + pic_xy;
xmin-=s->mb_x*16;
xmax-=s->mb_x*16;
ymin-=s->mb_y*16;
ymax-=s->mb_y*16;
dmin = pix_abs16x16(new_pic, old_pic, pic_stride, 16);
if(dmin<Z_THRESHOLD){
*mx_ptr= 0;
*my_ptr= 0;
//printf("Z");
return dmin;
}
P_last[0] = s->motion_val[mot_xy ][0];
P_last[1] = s->motion_val[mot_xy ][1];
P_left[0] = s->motion_val[mot_xy - 1][0];
P_left[1] = s->motion_val[mot_xy - 1][1];
if(P_left[0] > (xmax<<shift)) P_left[0]= (xmax<<shift);
/* special case for first line */
if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
*px_ptr= pred_x = P_left[0];
*py_ptr= pred_y = P_left[1];
CHECK_MV(pred_x>>shift, pred_y>>shift)
if(dmin<Z_THRESHOLD){
*mx_ptr= pred_x>>shift;
*my_ptr= pred_y>>shift;
//printf("M");
return dmin;
}
} else {
P_top [0] = s->motion_val[mot_xy - mot_stride ][0];
P_top [1] = s->motion_val[mot_xy - mot_stride ][1];
P_topright[0] = s->motion_val[mot_xy - mot_stride + off[0] ][0];
P_topright[1] = s->motion_val[mot_xy - mot_stride + off[0] ][1];
if(P_top [1] > (ymax<<shift)) P_top [1]= (ymax<<shift);
if(P_topright[0] < (xmin<<shift)) P_topright[0]= (xmin<<shift);
if(P_topright[1] > (ymax<<shift)) P_topright[1]= (ymax<<shift);
*px_ptr= pred_x = mid_pred(P_left[0], P_top[0], P_topright[0]);
*py_ptr= pred_y = mid_pred(P_left[1], P_top[1], P_topright[1]);
CHECK_MV(pred_x>>shift, pred_y>>shift)
if(dmin<Z_THRESHOLD){
*mx_ptr= pred_x>>shift;
*my_ptr= pred_y>>shift;
//printf("M");
return dmin;
}
CHECK_MV(P_left [0]>>shift, P_left [1]>>shift)
CHECK_MV(P_top [0]>>shift, P_top [1]>>shift)
CHECK_MV(P_topright[0]>>shift, P_topright[1]>>shift)
CHECK_MV(P_last [0]>>shift, P_last [1]>>shift)
}
dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
*mx_ptr= best[0];
*my_ptr= best[1];
// printf("%d %d %d \n", best[0], best[1], dmin);
return dmin;
}
#define CHECK_HALF_MV(suffix, x, y) \
d= pix_abs16x16_ ## suffix(pix, ptr+((x)>>1), s->linesize, 16);\
d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
if(d<dminh){\
dminh= d;\
mx= mx1 + x;\
my= my1 + y;\
}
/* The idea would be to make half pel ME after Inter/Intra decision to
save time. */
static void halfpel_motion_search(MpegEncContext * s,
static inline void halfpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax)
int xmin, int ymin, int xmax, int ymax,
int pred_x, int pred_y)
{
UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
const int quant= s->qscale;
int pen_x, pen_y;
int mx, my, mx1, my1, d, xx, yy, dminh;
UINT8 *pix;
UINT8 *pix, *ptr;
mx = *mx_ptr << 1;
my = *my_ptr << 1;
mx = *mx_ptr;
my = *my_ptr;
ptr = s->last_picture[0] + (my * s->linesize) + mx;
xx = 16 * s->mb_x;
yy = 16 * s->mb_y;
pix = s->new_picture[0] + (yy * s->linesize) + xx;
dminh = dmin;
/* Half pixel search */
mx1 = mx;
my1 = my;
if (mx > xmin && mx < xmax &&
my > ymin && my < ymax) {
pix = s->new_picture[0] + (yy * s->linesize) + xx;
if ((mx > (xmin << 1)) && mx < (xmax << 1) &&
(my > (ymin << 1)) && my < (ymax << 1)) {
int dx, dy, px, py;
UINT8 *ptr;
for (dy = -1; dy <= 1; dy++) {
for (dx = -1; dx <= 1; dx++) {
if (dx != 0 || dy != 0) {
px = mx1 + dx;
py = my1 + dy;
ptr = s->last_picture[0] + ((py >> 1) * s->linesize) + (px >> 1);
switch (((py & 1) << 1) | (px & 1)) {
default:
case 0:
d = pix_abs16x16(pix, ptr, s->linesize, 16);
break;
case 1:
d = pix_abs16x16_x2(pix, ptr, s->linesize, 16);
break;
case 2:
d = pix_abs16x16_y2(pix, ptr, s->linesize, 16);
break;
case 3:
d = pix_abs16x16_xy2(pix, ptr, s->linesize, 16);
break;
}
if (d < dminh) {
dminh = d;
mx = px;
my = py;
}
}
}
mx= mx1= 2*(mx - xx);
my= my1= 2*(my - yy);
if(dmin < Z_THRESHOLD && mx==0 && my==0){
*mx_ptr = 0;
*my_ptr = 0;
return;
}
pen_x= pred_x + mx;
pen_y= pred_y + my;
ptr-= s->linesize;
CHECK_HALF_MV(xy2, -1, -1)
CHECK_HALF_MV(y2 , 0, -1)
CHECK_HALF_MV(xy2, +1, -1)
ptr+= s->linesize;
CHECK_HALF_MV(x2 , -1, 0)
CHECK_HALF_MV(x2 , +1, 0)
CHECK_HALF_MV(xy2, -1, +1)
CHECK_HALF_MV(y2 , 0, +1)
CHECK_HALF_MV(xy2, +1, +1)
}else{
mx= 2*(mx - xx);
my= 2*(my - yy);
}
*mx_ptr = mx - (xx << 1);
*my_ptr = my - (yy << 1);
//fprintf(stderr,"half - MX: %d\tMY: %d\n",*mx_ptr ,*my_ptr);
*mx_ptr = mx;
*my_ptr = my;
}
#ifndef CONFIG_TEST_MV_ENCODE
@ -400,6 +545,7 @@ int estimate_motion(MpegEncContext * s,
UINT8 *pix, *ppix;
int sum, varc, vard, mx, my, range, dmin, xx, yy;
int xmin, ymin, xmax, ymax;
int pred_x=0, pred_y=0;
range = 8 * (1 << (s->f_code - 1));
/* XXX: temporary kludge to avoid overflow for msmpeg4 */
@ -426,7 +572,6 @@ int estimate_motion(MpegEncContext * s,
xmax = s->mb_width*16 - 16;
ymax = s->mb_height*16 - 16;
}
switch(s->full_search) {
case ME_ZERO:
default:
@ -442,8 +587,13 @@ int estimate_motion(MpegEncContext * s,
case ME_PHODS:
dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax);
break;
case ME_X1: // just reserving some space for experiments ...
case ME_EPZS:
dmin = epzs_motion_search(s, &mx, &my, &pred_x, &pred_y, xmin, ymin, xmax, ymax);
mx+= s->mb_x*16;
my+= s->mb_y*16;
break;
}
emms_c();
/* intra / predictive decision */
xx = mb_x * 16;
@ -470,7 +620,7 @@ int estimate_motion(MpegEncContext * s,
#endif
if (vard <= 64 || vard < varc) {
if (s->full_search != ME_ZERO) {
halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax);
halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y);
} else {
mx -= 16 * s->mb_x;
my -= 16 * s->mb_y;

View File

@ -66,7 +66,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
int n;
UINT64 time_code;
if ((s->picture_number % s->gop_size) == 0) {
if (s->picture_in_gop_number == 0) {
/* mpeg1 header repeated every gop */
put_header(s, SEQ_START_CODE);

View File

@ -67,6 +67,9 @@ static UINT8 h263_chroma_roundtab[16] = {
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
};
static UINT16 default_mv_penalty[MAX_FCODE][MAX_MV*2+1];
static UINT8 default_fcode_tab[MAX_MV*2+1];
/* default motion estimation */
int motion_estimation_method = ME_LOG;
@ -356,8 +359,24 @@ int MPV_encode_init(AVCodecContext *avctx)
return -1;
}
{ /* set up some save defaults, some codecs might override them later */
static int done=0;
if(!done){
int i;
done=1;
memset(default_mv_penalty, 0, sizeof(UINT16)*MAX_FCODE*(2*MAX_MV+1));
memset(default_fcode_tab , 0, sizeof(UINT8)*(2*MAX_MV+1));
for(i=-16; i<16; i++){
default_fcode_tab[i + MAX_MV]= 1;
}
}
}
s->mv_penalty= default_mv_penalty;
s->fcode_tab= default_fcode_tab;
if (s->out_format == FMT_H263)
h263_encode_init_vlc(s);
h263_encode_init(s);
s->encoding = 1;
@ -375,6 +394,7 @@ int MPV_encode_init(AVCodecContext *avctx)
rate_control_init(s);
s->picture_number = 0;
s->picture_in_gop_number = 0;
s->fake_picture_number = 0;
/* motion detector init */
s->f_code = 1;
@ -480,9 +500,10 @@ int MPV_encode_picture(AVCodecContext *avctx,
if (!s->intra_only) {
/* first picture of GOP is intra */
if ((s->picture_number % s->gop_size) == 0)
if (s->picture_in_gop_number >= s->gop_size){
s->picture_in_gop_number=0;
s->pict_type = I_TYPE;
else
}else
s->pict_type = P_TYPE;
} else {
s->pict_type = I_TYPE;
@ -521,6 +542,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
MPV_frame_end(s);
s->picture_number++;
s->picture_in_gop_number++;
if (s->out_format == FMT_MJPEG)
mjpeg_picture_trailer(s);
@ -1077,17 +1099,66 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mv_table[1][xy] = motion_y;
}
}
emms_c();
if(s->avg_mb_var < s->mc_mb_var && s->pict_type != B_TYPE){ //FIXME subtract MV bits
int i;
s->pict_type= I_TYPE;
for(i=0; i<s->mb_height*s->mb_width; i++){
s->mb_type[i] = I_TYPE;
s->picture_in_gop_number=0;
for(i=0; i<s->mb_num; i++){
s->mb_type[i] = 1;
s->mv_table[0][i] = 0;
s->mv_table[1][i] = 0;
}
}
/* find best f_code */
if(s->pict_type==P_TYPE){
int mv_num[8];
int i;
int loose=0;
UINT8 * fcode_tab= s->fcode_tab;
for(i=0; i<8; i++) mv_num[i]=0;
for(i=0; i<s->mb_num; i++){
if(s->mb_type[i] == 0){
mv_num[ fcode_tab[s->mv_table[0][i] + MAX_MV] ]++;
mv_num[ fcode_tab[s->mv_table[1][i] + MAX_MV] ]++;
//printf("%d %d %d\n", s->mv_table[0][i], fcode_tab[s->mv_table[0][i] + MAX_MV], i);
}
//else printf("I");
}
for(i=MAX_FCODE; i>1; i--){
loose+= mv_num[i];
if(loose > 4) break;
}
s->f_code= i;
}else{
s->f_code= 1;
}
//printf("f_code %d ///\n", s->f_code);
/* convert MBs with too long MVs to I-Blocks */
if(s->pict_type==P_TYPE){
int i;
const int f_code= s->f_code;
UINT8 * fcode_tab= s->fcode_tab;
for(i=0; i<s->mb_num; i++){
if(s->mb_type[i] == 0){
if( fcode_tab[s->mv_table[0][i] + MAX_MV] > f_code
|| fcode_tab[s->mv_table[0][i] + MAX_MV] == 0
|| fcode_tab[s->mv_table[1][i] + MAX_MV] > f_code
|| fcode_tab[s->mv_table[1][i] + MAX_MV] == 0 ){
s->mb_type[i] = 1;
s->mv_table[0][i] = 0;
s->mv_table[1][i] = 0;
}
}
}
}
// printf("%d %d\n", s->avg_mb_var, s->mc_mb_var);
if (!s->fixed_qscale)

View File

@ -34,6 +34,9 @@ enum OutputFormat {
#define QMAT_SHIFT_MMX 19
#define QMAT_SHIFT 25
#define MAX_FCODE 7
#define MAX_MV 2048
typedef struct Predictor{
double coeff;
double count;
@ -71,7 +74,8 @@ typedef struct MpegEncContext {
int context_initialized;
int picture_number;
int fake_picture_number; /* picture number at the bitstream frame rate */
int gop_picture_number; /* index of the first picture of a GOP */
int gop_picture_number; /* index of the first picture of a GOP based on fake_pic_num & mpeg1 specific */
int picture_in_gop_number; /* 0-> first pic in gop, ... */
int mb_width, mb_height;
int mb_num; /* number of MBs of a picture */
int linesize; /* line size, in bytes, may be different from width */
@ -114,7 +118,7 @@ typedef struct MpegEncContext {
#define MV_DIRECT 4 // bidirectional mode where the difference equals the MV of the last P/S/I-Frame (mpeg4)
int mv_type;
#define MV_TYPE_16X16 0 /* 1 vector for the whole mb */
#define MV_TYPE_8X8 1 /* 4 vectors (h263) */
#define MV_TYPE_8X8 1 /* 4 vectors (h263, mpeg4 4MV) */
#define MV_TYPE_16X8 2 /* 2 vectors, one per 16x8 block */
#define MV_TYPE_FIELD 3 /* 2 vectors, one per field */
#define MV_TYPE_DMV 4 /* 2 vectors, special mpeg2 Dual Prime Vectors */
@ -126,6 +130,8 @@ typedef struct MpegEncContext {
int mv[2][4][2];
int field_select[2][2];
int last_mv[2][2][2];
UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */
UINT8 *fcode_tab; /* smallest fcode needed for each MV */
int has_b_frames;
int no_rounding; /* apply no rounding to motion estimation (MPEG4) */
@ -350,7 +356,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
int dir);
void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
void h263_encode_init_vlc(MpegEncContext *s);
void h263_encode_init(MpegEncContext *s);
void h263_decode_init_vlc(MpegEncContext *s);
int h263_decode_picture_header(MpegEncContext *s);