From ba6802debf8167f8b9259e83f820dfb53c15a227 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Wed, 27 Mar 2002 21:25:22 +0000 Subject: [PATCH] 4MV motion estimation (not finished yet) SAD functions rewritten (8x8 support & MMX2 optimizations) HQ inter/intra decission msmpeg4 encoding bugfix (MV where too long) Originally committed as revision 362 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/avcodec.h | 5 + libavcodec/dsputil.c | 207 +++++++++---- libavcodec/dsputil.h | 14 +- libavcodec/h263.c | 14 +- libavcodec/i386/dsputil_mmx.c | 53 +++- libavcodec/i386/motion_est_mmx.c | 514 +++++++++++++++++++------------ libavcodec/motion_est.c | 340 +++++++++++++++++--- libavcodec/mpegvideo.c | 406 ++++++++++++++---------- libavcodec/mpegvideo.h | 24 +- 9 files changed, 1078 insertions(+), 499 deletions(-) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 0bca2b4fda..df6a7cfab7 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -61,9 +61,14 @@ extern int motion_estimation_method; #define ME_X1 5 /* encoding support */ +/* note not everything is supported yet */ #define CODEC_FLAG_HQ 0x0001 /* high quality (non real time) encoding */ #define CODEC_FLAG_QSCALE 0x0002 /* use fixed qscale */ +#define CODEC_FLAG_4MV 0x0004 /* 4 MV per MB allowed */ +#define CODEC_FLAG_B 0x0008 /* use B frames */ +#define CODEC_FLAG_QPEL 0x0010 /* use qpel MC */ +#define CODEC_FLAG_GMC 0x0020 /* use GMC */ /* codec capabilities */ diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 0e698f35ce..d27687d84a 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -36,6 +36,11 @@ op_pixels_abs_func pix_abs16x16_x2; op_pixels_abs_func pix_abs16x16_y2; op_pixels_abs_func pix_abs16x16_xy2; +op_pixels_abs_func pix_abs8x8; +op_pixels_abs_func pix_abs8x8_x2; +op_pixels_abs_func pix_abs8x8_y2; +op_pixels_abs_func pix_abs8x8_xy2; + UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; UINT32 squareTbl[512]; @@ -377,14 +382,14 @@ static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int i; for(i=0; i>8)]; - dst[1]= cm[(((src[1]+src[2])*160 - (src[0]+src[3])*48 + (src[0]+src[4])*24 - (src[1]+src[5])*8 + r)>>8)]; - dst[2]= cm[(((src[2]+src[3])*160 - (src[1]+src[4])*48 + (src[0]+src[5])*24 - (src[0]+src[6])*8 + r)>>8)]; - dst[3]= cm[(((src[3]+src[4])*160 - (src[2]+src[5])*48 + (src[1]+src[6])*24 - (src[0]+src[7])*8 + r)>>8)]; - dst[4]= cm[(((src[4]+src[5])*160 - (src[3]+src[6])*48 + (src[2]+src[7])*24 - (src[1]+src[8])*8 + r)>>8)]; - dst[5]= cm[(((src[5]+src[6])*160 - (src[4]+src[7])*48 + (src[3]+src[8])*24 - (src[2]+src[8])*8 + r)>>8)]; - dst[6]= cm[(((src[6]+src[7])*160 - (src[5]+src[8])*48 + (src[4]+src[8])*24 - (src[3]+src[7])*8 + r)>>8)]; - dst[7]= cm[(((src[7]+src[8])*160 - (src[6]+src[8])*48 + (src[5]+src[7])*24 - (src[4]+src[6])*8 + r)>>8)]; + dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)]; + dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)]; + dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)]; + dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)]; + dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)]; + dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)]; + dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)]; + dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)]; dst+=dstStride; src+=srcStride; } @@ -405,14 +410,14 @@ static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, const int src6= src[6*srcStride]; const int src7= src[7*srcStride]; const int src8= src[8*srcStride]; - dst[0*dstStride]= cm[(((src0+src1)*160 - (src0+src2)*48 + (src1+src3)*24 - (src2+src4)*8 + r)>>8)]; - dst[1*dstStride]= cm[(((src1+src2)*160 - (src0+src3)*48 + (src0+src4)*24 - (src1+src5)*8 + r)>>8)]; - dst[2*dstStride]= cm[(((src2+src3)*160 - (src1+src4)*48 + (src0+src5)*24 - (src0+src6)*8 + r)>>8)]; - dst[3*dstStride]= cm[(((src3+src4)*160 - (src2+src5)*48 + (src1+src6)*24 - (src0+src7)*8 + r)>>8)]; - dst[4*dstStride]= cm[(((src4+src5)*160 - (src3+src6)*48 + (src2+src7)*24 - (src1+src8)*8 + r)>>8)]; - dst[5*dstStride]= cm[(((src5+src6)*160 - (src4+src7)*48 + (src3+src8)*24 - (src2+src8)*8 + r)>>8)]; - dst[6*dstStride]= cm[(((src6+src7)*160 - (src5+src8)*48 + (src4+src8)*24 - (src3+src7)*8 + r)>>8)]; - dst[7*dstStride]= cm[(((src7+src8)*160 - (src6+src8)*48 + (src5+src7)*24 - (src4+src6)*8 + r)>>8)]; + dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)]; + dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)]; + dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)]; + dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)]; + dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)]; + dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)]; + dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)]; + dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)]; dst++; src++; } @@ -485,38 +490,38 @@ static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 half[64];\ - qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ + qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\ avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ }\ \ static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ - qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ + qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\ }\ \ static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 half[64];\ - qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\ + qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\ avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\ }\ \ static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 half[64];\ - qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ + qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\ avg2_block(dst, src, half, dstStride, srcStride, 1-r);\ }\ \ static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ - qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\ + qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\ }\ \ static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 half[64];\ - qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\ + qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\ avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\ }\ static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -524,9 +529,9 @@ static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ }\ static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -534,9 +539,9 @@ static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\ }\ static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -544,9 +549,9 @@ static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ }\ static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -554,25 +559,25 @@ static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\ }\ static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 halfH[72];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\ }\ static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 halfH[72];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\ }\ static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -580,9 +585,9 @@ static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ }\ static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ @@ -590,16 +595,16 @@ static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcS UINT8 halfH[72];\ UINT8 halfV[64];\ UINT8 halfHV[64];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 128-r);\ - qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\ + qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\ avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\ }\ static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\ {\ UINT8 halfH[72];\ - qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\ - qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 128-r);\ + qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\ + qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\ }\ qpel_mc_func qpel_mc ## name ## _tab[16]={ \ qpel_mc00_c ## name, \ @@ -623,12 +628,12 @@ qpel_mc_func qpel_mc ## name ## _tab[16]={ \ QPEL_MC(0, _rnd) QPEL_MC(1, _no_rnd) -int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) +int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) { int s, i; s = 0; - for(i=0;ih263_pred) { - for (i = 0; i < 6; i++) { -// mpeg4_encode_block(s, block[i], i); - } - } else { - for (i = 0; i < 6; i++) { - h263_encode_block(s, block[i], i); - } + for (i = 0; i < 6; i++) { + h263_encode_block(s, block[i], i); } } @@ -778,8 +772,8 @@ void h263_encode_init(MpegEncContext *s) s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p // use fcodes >1 only for mpeg4 & h263 & h263p FIXME - if(s->h263_plus) s->fcode_tab= umv_fcode_tab; - else if(s->h263_pred) s->fcode_tab= fcode_tab; + if(s->h263_plus) s->fcode_tab= umv_fcode_tab; + else if(s->h263_pred && !s->h263_msmpeg4) s->fcode_tab= fcode_tab; } static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 6b35d47534..09a7174126 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -24,19 +24,34 @@ int mm_flags; /* multimedia extension flags */ -int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); -int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); + +int pix_abs16x16_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs16x16_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); + +int pix_abs8x8_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx); + +int pix_abs8x8_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); + /* external functions, from idct_mmx.c */ void ff_mmx_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block); /* pixel operations */ -static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; -static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; +static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001LL; +static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002LL; //static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; //static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; @@ -1035,10 +1050,14 @@ void dsputil_init_mmx(void) put_pixels_clamped = put_pixels_clamped_mmx; add_pixels_clamped = add_pixels_clamped_mmx; - pix_abs16x16 = pix_abs16x16_mmx; - pix_abs16x16_x2 = pix_abs16x16_x2_mmx; - pix_abs16x16_y2 = pix_abs16x16_y2_mmx; + pix_abs16x16 = pix_abs16x16_mmx; + pix_abs16x16_x2 = pix_abs16x16_x2_mmx; + pix_abs16x16_y2 = pix_abs16x16_y2_mmx; pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; + pix_abs8x8 = pix_abs8x8_mmx; + pix_abs8x8_x2 = pix_abs8x8_x2_mmx; + pix_abs8x8_y2 = pix_abs8x8_y2_mmx; + pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; av_fdct = fdct_mmx; put_pixels_tab[0] = put_pixels_mmx; @@ -1067,10 +1086,16 @@ void dsputil_init_mmx(void) sub_pixels_tab[3] = sub_pixels_xy2_mmx; if (mm_flags & MM_MMXEXT) { - pix_abs16x16 = pix_abs16x16_sse; - } - - if (mm_flags & MM_SSE) { + pix_abs16x16 = pix_abs16x16_mmx2; + pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; + pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; + pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2; + + pix_abs8x8 = pix_abs8x8_mmx2; + pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; + pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; + pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2; + put_pixels_tab[1] = put_pixels_x2_sse; put_pixels_tab[2] = put_pixels_y2_sse; diff --git a/libavcodec/i386/motion_est_mmx.c b/libavcodec/i386/motion_est_mmx.c index 35b16b711c..e704c42194 100644 --- a/libavcodec/i386/motion_est_mmx.c +++ b/libavcodec/i386/motion_est_mmx.c @@ -16,229 +16,347 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * + * mostly by Michael Niedermayer */ #include "../dsputil.h" -#include "mmx.h" -static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001; -static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002; +static const __attribute__ ((aligned(8))) UINT64 round_tab[3]={ +0x0000000000000000, +0x0001000100010001, +0x0002000200020002, +}; -/* mm7 is accumulator, mm6 is zero */ -static inline void sad_add(const UINT8 *p1, const UINT8 *p2) +static inline void sad8_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h) { - movq_m2r(*p1, mm0); - movq_m2r(*p2, mm1); - movq_r2r(mm0, mm2); - psubusb_r2r(mm1, mm0); - psubusb_r2r(mm2, mm1); - por_r2r(mm1, mm0); /* mm0 is absolute value */ - - movq_r2r(mm0, mm1); - punpcklbw_r2r(mm6, mm0); - punpckhbw_r2r(mm6, mm1); - paddusw_r2r(mm0, mm7); - paddusw_r2r(mm1, mm7); + int len= -(stride<>= 1; - p1 = blk1; - p2 = blk2; - pxor_r2r(mm7, mm7); /* mm7 is accumulator */ - pxor_r2r(mm6, mm6); /* mm7 is zero constant */ - do { - sad_add(p1, p2); - sad_add(p1 + 8, p2 + 8); - p1 += lx; - p2 += lx; - sad_add(p1, p2); - sad_add(p1 + 8, p2 + 8); - p1 += lx; - p2 += lx; - } while (--h); - return sad_end(); + int len= -(stride<>= 1; - p1 = blk1; - p2 = blk2; - pxor_r2r(mm7, mm7); /* mm7 is accumulator */ - do { - sad_add_sse(p1, p2); - p1 += lx; - p2 += lx; - sad_add_sse(p1, p2); - p1 += lx; - p2 += lx; - } while (--h); - return sad_end(); + int len= -(stride<0 ? (a) : -(a)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) #define INTER_BIAS 257 static void halfpel_motion_search(MpegEncContext * s, @@ -164,7 +165,7 @@ static int full_motion_search(MpegEncContext * s, for (y = y1; y <= y2; y++) { for (x = x1; x <= x2; x++) { d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, - s->linesize, 16); + s->linesize); if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < @@ -228,7 +229,7 @@ static int log_motion_search(MpegEncContext * s, do { for (y = y1; y <= y2; y += range) { for (x = x1; x <= x2; x += range) { - d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16); + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize); if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dmin = d; mx = x; @@ -308,7 +309,7 @@ static int phods_motion_search(MpegEncContext * s, lastx = x; for (x = x1; x <= x2; x += range) { - d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16); + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize); if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dminx = d; mx = x; @@ -317,7 +318,7 @@ static int phods_motion_search(MpegEncContext * s, x = lastx; for (y = y1; y <= y2; y += range) { - d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16); + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize); if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { dminy = d; my = y; @@ -361,7 +362,7 @@ static int phods_motion_search(MpegEncContext * s, #define CHECK_MV(x,y)\ {\ - d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride, 16);\ + d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ d += (mv_penalty[((x)<mb_x, s->mb_y);\ if( (x)>(xmax<<(S)) ) printf("%d %d %d %d xmax" #v, (x), (y), s->mb_x, s->mb_y);\ @@ -440,6 +465,32 @@ static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin, */ } +static inline int small_diamond_search4MV(MpegEncContext * s, int *best, int dmin, + UINT8 *new_pic, UINT8 *old_pic, int pic_stride, + int pred_x, int pred_y, UINT16 *mv_penalty, int quant, + int xmin, int ymin, int xmax, int ymax, int shift) +{ + int next_dir=-1; + + for(;;){ + int d; + const int dir= next_dir; + const int x= best[0]; + const int y= best[1]; + next_dir=-1; + +//printf("%d", dir); + if(dir!=2 && x>xmin) CHECK_MV4_DIR(x-1, y , 0) + if(dir!=3 && y>ymin) CHECK_MV4_DIR(x , y-1, 1) + if(dir!=0 && x=xmin && x<=xmax && y>=ymin && y<=ymax){ - d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride, 16); + d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride); d += (mv_penalty[((x)<new_picture[0] + pic_xy; old_pic = s->last_picture[0] + pic_xy; - dmin = pix_abs16x16(new_pic, old_pic, pic_stride, 16); + dmin = pix_abs16x16(new_pic, old_pic, pic_stride); if(dminlinesize; + const int pic_xy= ((s->mb_y*2 + (block>>1))*pic_stride + s->mb_x*2 + (block&1))*8; + UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame + int quant= s->qscale; // qscale of the prev frame + const int shift= 1+s->quarter_sample; + + new_pic = s->new_picture[0] + pic_xy; + old_pic = s->last_picture[0] + pic_xy; + + dmin = pix_abs8x8(new_pic, old_pic, pic_stride); + + /* first line */ + if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) { + CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift) + }else{ + CHECK_MV4(P[4][0]>>shift, P[4][1]>>shift) + if(dmin>shift; + *my_ptr= P[4][1]>>shift; +//printf("M\n"); + return dmin; + } + CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift) + CHECK_MV4(P[2][0]>>shift, P[2][1]>>shift) + CHECK_MV4(P[3][0]>>shift, P[3][1]>>shift) + } + CHECK_MV4(P[0][0]>>shift, P[0][1]>>shift) + CHECK_MV4(P[5][0]>>shift, P[5][1]>>shift) + +//check(best[0],best[1],0, b0) + dmin= small_diamond_search4MV(s, best, dmin, new_pic, old_pic, pic_stride, + pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift); +//check(best[0],best[1],0, b1) + *mx_ptr= best[0]; + *my_ptr= best[1]; + +// printf("%d %d %d \n", best[0], best[1], dmin); + return dmin; +} + #define CHECK_HALF_MV(suffix, x, y) \ - d= pix_abs16x16_ ## suffix(pix, ptr+((x)>>1), s->linesize, 16);\ + d= pix_abs16x16_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\ if(d>1), s->linesize);\ + d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\ + if(dmv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame + const int quant= s->qscale; + int pen_x, pen_y; + int mx, my, mx1, my1, d, xx, yy, dminh; + UINT8 *pix, *ptr; + + xx = 8 * block_x; + yy = 8 * block_y; + pix = s->new_picture[0] + (yy * s->linesize) + xx; + + mx = *mx_ptr; + my = *my_ptr; + ptr = s->last_picture[0] + ((yy+my) * s->linesize) + xx + mx; + + dminh = dmin; + + if (mx > xmin && mx < xmax && + my > ymin && my < ymax) { + + mx= mx1= 2*mx; + my= my1= 2*my; + if(dmin < Z_THRESHOLD && mx==0 && my==0){ + *mx_ptr = 0; + *my_ptr = 0; + return; + } + + pen_x= pred_x + mx; + pen_y= pred_y + my; + + ptr-= s->linesize; + CHECK_HALF_MV4(xy2, -1, -1) + CHECK_HALF_MV4(y2 , 0, -1) + CHECK_HALF_MV4(xy2, +1, -1) + + ptr+= s->linesize; + CHECK_HALF_MV4(x2 , -1, 0) + CHECK_HALF_MV4(x2 , +1, 0) + CHECK_HALF_MV4(xy2, -1, +1) + CHECK_HALF_MV4(y2 , 0, +1) + CHECK_HALF_MV4(xy2, +1, +1) + + }else{ + mx*=2; + my*=2; + } + + *mx_ptr = mx; + *my_ptr = my; +} + +static inline void set_mv_tables(MpegEncContext * s, int mx, int my) +{ + const int xy= s->mb_x + s->mb_y*s->mb_width; + + s->mv_table[0][xy] = mx; + s->mv_table[1][xy] = my; + + /* has allready been set to the 4 MV if 4MV is done */ + if(!(s->flags&CODEC_FLAG_4MV)){ + int mot_xy= s->block_index[0]; + + s->motion_val[mot_xy ][0]= mx; + s->motion_val[mot_xy ][1]= my; + s->motion_val[mot_xy+1][0]= mx; + s->motion_val[mot_xy+1][1]= my; + + mot_xy += s->block_wrap[0]; + s->motion_val[mot_xy ][0]= mx; + s->motion_val[mot_xy ][1]= my; + s->motion_val[mot_xy+1][0]= mx; + s->motion_val[mot_xy+1][1]= my; + } +} + #ifndef CONFIG_TEST_MV_ENCODE -int estimate_motion(MpegEncContext * s, - int mb_x, int mb_y, - int *mx_ptr, int *my_ptr) +void estimate_motion(MpegEncContext * s, + int mb_x, int mb_y) { UINT8 *pix, *ppix; int sum, varc, vard, mx, my, range, dmin, xx, yy; int xmin, ymin, xmax, ymax; int rel_xmin, rel_ymin, rel_xmax, rel_ymax; int pred_x=0, pred_y=0; - int P[5][2]; + int P[6][2]; const int shift= 1+s->quarter_sample; + int mb_type=0; range = 8 * (1 << (s->f_code - 1)); /* XXX: temporary kludge to avoid overflow for msmpeg4 */ @@ -680,14 +869,13 @@ int estimate_motion(MpegEncContext * s, case ME_X1: case ME_EPZS: { - static const int off[4]= {2, 1, 1, -1}; - const int mot_stride = s->mb_width*2 + 2; - const int mot_xy = (s->mb_y*2 + 1)*mot_stride + s->mb_x*2 + 1; + const int mot_stride = s->block_wrap[0]; + const int mot_xy = s->block_index[0]; - rel_xmin= xmin - s->mb_x*16; - rel_xmax= xmax - s->mb_x*16; - rel_ymin= ymin - s->mb_y*16; - rel_ymax= ymax - s->mb_y*16; + rel_xmin= xmin - mb_x*16; + rel_xmax= xmax - mb_x*16; + rel_ymin= ymin - mb_y*16; + rel_ymax= ymax - mb_y*16; P[0][0] = s->motion_val[mot_xy ][0]; P[0][1] = s->motion_val[mot_xy ][1]; @@ -696,14 +884,14 @@ int estimate_motion(MpegEncContext * s, if(P[1][0] > (rel_xmax<mb_y == 0 || s->first_slice_line || s->first_gob_line)) { + if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) { P[4][0] = P[1][0]; P[4][1] = P[1][1]; } else { P[2][0] = s->motion_val[mot_xy - mot_stride ][0]; P[2][1] = s->motion_val[mot_xy - mot_stride ][1]; - P[3][0] = s->motion_val[mot_xy - mot_stride + off[0] ][0]; - P[3][1] = s->motion_val[mot_xy - mot_stride + off[0] ][1]; + P[3][0] = s->motion_val[mot_xy - mot_stride + 2 ][0]; + P[3][1] = s->motion_val[mot_xy - mot_stride + 2 ][1]; if(P[2][1] > (rel_ymax< (rel_ymax<mb_x*16; - my+= s->mb_y*16; + mx+= mb_x*16; + my+= mb_y*16; break; } + + if(s->flags&CODEC_FLAG_4MV){ + int block; + + mb_type|= MB_TYPE_INTER4V; + + for(block=0; block<4; block++){ + int mx4, my4; + int pred_x4, pred_y4; + int dmin4; + static const int off[4]= {2, 1, 1, -1}; + const int mot_stride = s->block_wrap[0]; + const int mot_xy = s->block_index[block]; + const int block_x= mb_x*2 + (block&1); + const int block_y= mb_y*2 + (block>>1); + + const int rel_xmin4= xmin - block_x*8; + const int rel_xmax4= xmax - block_x*8; + const int rel_ymin4= ymin - block_y*8; + const int rel_ymax4= ymax - block_y*8; + + P[0][0] = s->motion_val[mot_xy ][0]; + P[0][1] = s->motion_val[mot_xy ][1]; + P[1][0] = s->motion_val[mot_xy - 1][0]; + P[1][1] = s->motion_val[mot_xy - 1][1]; + if(P[1][0] > (rel_xmax4<first_slice_line || s->first_gob_line) && block<2) { + P[4][0] = P[1][0]; + P[4][1] = P[1][1]; + } else { + P[2][0] = s->motion_val[mot_xy - mot_stride ][0]; + P[2][1] = s->motion_val[mot_xy - mot_stride ][1]; + P[3][0] = s->motion_val[mot_xy - mot_stride + off[block]][0]; + P[3][1] = s->motion_val[mot_xy - mot_stride + off[block]][1]; + if(P[2][1] > (rel_ymax4< (rel_ymax4<out_format == FMT_H263){ + pred_x4 = P[4][0]; + pred_y4 = P[4][1]; + }else { /* mpeg1 at least */ + pred_x4= P[1][0]; + pred_y4= P[1][1]; + } + P[5][0]= mx - mb_x*16; + P[5][1]= my - mb_y*16; + + dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4); + + halfpel_motion_search4(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, + pred_x4, pred_y4, block_x, block_y); + + s->motion_val[ s->block_index[block] ][0]= mx4; + s->motion_val[ s->block_index[block] ][1]= my4; + } + } /* intra / predictive decision */ xx = mb_x * 16; @@ -737,7 +987,7 @@ int estimate_motion(MpegEncContext * s, sum = pix_sum(pix, s->linesize); #if 0 varc = pix_dev(pix, s->linesize, (sum+128)>>8) + INTER_BIAS; - vard = pix_abs16x16(pix, ppix, s->linesize, 16); + vard = pix_abs16x16(pix, ppix, s->linesize); #else sum= (sum+8)>>4; varc = ((pix_norm1(pix, s->linesize) - sum*sum + 128 + 500)>>8); @@ -745,30 +995,38 @@ int estimate_motion(MpegEncContext * s, #endif s->mb_var[s->mb_width * mb_y + mb_x] = varc; - s->avg_mb_var += varc; + s->avg_mb_var+= varc; s->mc_mb_var += vard; #if 0 printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n", varc, s->avg_mb_var, sum, vard, mx - xx, my - yy); #endif - if (vard <= 64 || vard < varc) { - if (s->full_search != ME_ZERO) { + if(s->flags&CODEC_FLAG_HQ){ + if (vard*2 + 200 > varc) + mb_type|= MB_TYPE_INTRA; + if (varc*2 + 200 > vard){ + mb_type|= MB_TYPE_INTER; halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y); - } else { - mx -= 16 * s->mb_x; - my -= 16 * s->mb_y; } -// check(mx + 32*s->mb_x, my + 32*s->mb_y, 1, end) - - *mx_ptr = mx; - *my_ptr = my; - return 0; - } else { - *mx_ptr = 0; - *my_ptr = 0; - return 1; + }else{ + if (vard <= 64 || vard < varc) { + mb_type|= MB_TYPE_INTER; + if (s->full_search != ME_ZERO) { + halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y); + } else { + mx -= 16 * mb_x; + my -= 16 * mb_y; + } + }else{ + mb_type|= MB_TYPE_INTRA; + mx = 0;//mx*2 - 32 * mb_x; + my = 0;//my*2 - 32 * mb_y; + } } + + s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; + set_mv_tables(s, mx, my); } #else diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index a4d649cfe5..c06f51e8af 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -227,6 +227,8 @@ int MPV_common_init(MpegEncContext *s) if (!s->mbskip_table) goto fail; } + + s->block= s->intra_block; s->context_initialized = 1; return 0; @@ -295,7 +297,7 @@ int MPV_encode_init(AVCodecContext *avctx) s->qblur= avctx->qblur; s->avctx = avctx; s->aspect_ratio_info= avctx->aspect_ratio_info; - s->hq= (avctx->flags & CODEC_FLAG_HQ); + s->flags= avctx->flags; if (s->gop_size <= 1) { s->intra_only = 1; @@ -1078,68 +1080,183 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) } } the_end: - emms_c(); + emms_c(); //FIXME remove +} + +static void encode_mb(MpegEncContext *s) +{ + int wrap; + const int mb_x= s->mb_x; + const int mb_y= s->mb_y; + UINT8 *ptr; + const int motion_x= s->mv[0][0][0]; + const int motion_y= s->mv[0][0][1]; + int i; + + /* get the pixels */ + wrap = s->linesize; + ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16; + get_pixels(s->block[0], ptr, wrap); + get_pixels(s->block[1], ptr + 8, wrap); + get_pixels(s->block[2], ptr + 8 * wrap, wrap); + get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap); + wrap = s->linesize >> 1; + ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8; + get_pixels(s->block[4], ptr, wrap); + + wrap = s->linesize >> 1; + ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8; + get_pixels(s->block[5], ptr, wrap); + + /* subtract previous frame if non intra */ + if (!s->mb_intra) { + int dxy, offset, mx, my; + + dxy = ((motion_y & 1) << 1) | (motion_x & 1); + ptr = s->last_picture[0] + + ((mb_y * 16 + (motion_y >> 1)) * s->linesize) + + (mb_x * 16 + (motion_x >> 1)); + + sub_pixels_2(s->block[0], ptr, s->linesize, dxy); + sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy); + sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy); + sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy); + + if (s->out_format == FMT_H263) { + /* special rounding for h263 */ + dxy = 0; + if ((motion_x & 3) != 0) + dxy |= 1; + if ((motion_y & 3) != 0) + dxy |= 2; + mx = motion_x >> 2; + my = motion_y >> 2; + } else { + mx = motion_x / 2; + my = motion_y / 2; + dxy = ((my & 1) << 1) | (mx & 1); + mx >>= 1; + my >>= 1; + } + offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx); + ptr = s->last_picture[1] + offset; + sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy); + ptr = s->last_picture[2] + offset; + sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy); + } + +#if 0 + { + float adap_parm; + + adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) / + ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0); + + printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", + (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', + s->qscale, adap_parm, s->qscale*adap_parm, + s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var); + } +#endif + /* DCT & quantize */ + if (s->h263_msmpeg4) { + msmpeg4_dc_scale(s); + } else if (s->h263_pred) { + h263_dc_scale(s); + } else { + /* default quantization values */ + s->y_dc_scale = 8; + s->c_dc_scale = 8; + } + for(i=0;i<6;i++) { + s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale); + } + + /* huffman encode */ + switch(s->out_format) { + case FMT_MPEG1: + mpeg1_encode_mb(s, s->block, motion_x, motion_y); + break; + case FMT_H263: + if (s->h263_msmpeg4) + msmpeg4_encode_mb(s, s->block, motion_x, motion_y); + else if(s->h263_pred) + mpeg4_encode_mb(s, s->block, motion_x, motion_y); + else + h263_encode_mb(s, s->block, motion_x, motion_y); + break; + case FMT_MJPEG: + mjpeg_encode_mb(s, s->block); + break; + } +} + +static void copy_bits(PutBitContext *pb, UINT8 *src, int length) +{ + int bytes= length>>3; + int bits= length&7; + int i; + + for(i=0; i>(8-bits)); } static void encode_picture(MpegEncContext *s, int picture_number) { - int mb_x, mb_y, wrap, last_gob, pdif = 0; - UINT8 *ptr; - int i, motion_x, motion_y; + int mb_x, mb_y, last_gob, pdif = 0; + int i; int bits; + MpegEncContext best_s; + UINT8 bit_buf[4][3000]; //FIXME check that this is ALLWAYS large enogh for a MB s->picture_number = picture_number; + s->block_wrap[0]= + s->block_wrap[1]= + s->block_wrap[2]= + s->block_wrap[3]= s->mb_width*2 + 2; + s->block_wrap[4]= + s->block_wrap[5]= s->mb_width + 2; + s->last_mc_mb_var = s->mc_mb_var; /* Reset the average MB variance */ s->avg_mb_var = 0; s->mc_mb_var = 0; /* Estimate motion for every MB */ - for(mb_y=0; mb_y < s->mb_height; mb_y++) { - for(mb_x=0; mb_x < s->mb_width; mb_x++) { - int xy= mb_y * s->mb_width + mb_x; - const int mot_stride = s->mb_width*2 + 2; - int mot_xy = (mb_y*2 + 1)*mot_stride + mb_x*2 + 1; - s->mb_x = mb_x; - s->mb_y = mb_y; + if(s->pict_type == P_TYPE){ + for(mb_y=0; mb_y < s->mb_height; mb_y++) { + s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1; + s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1); + s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1; + s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2); + for(mb_x=0; mb_x < s->mb_width; mb_x++) { + s->mb_x = mb_x; + s->mb_y = mb_y; + s->block_index[0]+=2; + s->block_index[1]+=2; + s->block_index[2]+=2; + s->block_index[3]+=2; - /* compute motion vector and macro block type (intra or non intra) */ - motion_x = 0; - motion_y = 0; - if (s->pict_type == P_TYPE) { - s->mb_intra = estimate_motion(s, mb_x, mb_y, - &motion_x, - &motion_y); - } else { - s->mb_intra = 1; + /* compute motion vector & mb_type and store in context */ + estimate_motion(s, mb_x, mb_y); +// s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER; } - /* Store MB type and MV */ - s->mb_type[xy] = s->mb_intra; - s->mv_table[0][xy] = motion_x; - s->mv_table[1][xy] = motion_y; - - s->motion_val[mot_xy ][0]= motion_x; - s->motion_val[mot_xy ][1]= motion_y; - s->motion_val[mot_xy+1][0]= motion_x; - s->motion_val[mot_xy+1][1]= motion_y; - mot_xy += mot_stride; - s->motion_val[mot_xy ][0]= motion_x; - s->motion_val[mot_xy ][1]= motion_y; - s->motion_val[mot_xy+1][0]= motion_x; - s->motion_val[mot_xy+1][1]= motion_y; } + emms_c(); + }else{ + /* I-Frame */ + //FIXME do we need to zero them? + memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2); + memset(s->mv_table[0] , 0, sizeof(INT16)*s->mb_width*s->mb_height); + memset(s->mv_table[1] , 0, sizeof(INT16)*s->mb_width*s->mb_height); + memset(s->mb_type , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height); } - emms_c(); if(s->avg_mb_var < s->mc_mb_var && s->pict_type != B_TYPE){ //FIXME subtract MV bits - int i; s->pict_type= I_TYPE; s->picture_in_gop_number=0; - for(i=0; imb_num; i++){ - s->mb_type[i] = 1; - s->mv_table[0][i] = 0; - s->mv_table[1][i] = 0; - } + memset(s->mb_type , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height); +//printf("Scene change detected, encoding as I Frame\n"); } /* find best f_code for ME which do unlimited searches */ @@ -1152,7 +1269,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) for(i=0; i<8; i++) mv_num[i]=0; for(i=0; imb_num; i++){ - if(s->mb_type[i] == 0){ + if(s->mb_type[i] & (MB_TYPE_INTER|MB_TYPE_INTER4V)){ mv_num[ fcode_tab[s->mv_table[0][i] + MAX_MV] ]++; mv_num[ fcode_tab[s->mv_table[1][i] + MAX_MV] ]++; //printf("%d %d %d\n", s->mv_table[0][i], fcode_tab[s->mv_table[0][i] + MAX_MV], i); @@ -1181,16 +1298,20 @@ static void encode_picture(MpegEncContext *s, int picture_number) UINT8 * fcode_tab= s->fcode_tab; for(i=0; imb_num; i++){ - if(s->mb_type[i] == 0){ + if(s->mb_type[i]&MB_TYPE_INTER){ if( fcode_tab[s->mv_table[0][i] + MAX_MV] > f_code || fcode_tab[s->mv_table[0][i] + MAX_MV] == 0 || fcode_tab[s->mv_table[1][i] + MAX_MV] > f_code || fcode_tab[s->mv_table[1][i] + MAX_MV] == 0 ){ - s->mb_type[i] = 1; + s->mb_type[i] &= ~MB_TYPE_INTER; + s->mb_type[i] |= MB_TYPE_INTRA; s->mv_table[0][i] = 0; s->mv_table[1][i] = 0; } } + if(s->mb_type[i]&MB_TYPE_INTER4V){ + //FIXME + } } } @@ -1249,8 +1370,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mb_incr = 1; s->last_mv[0][0][0] = 0; s->last_mv[0][0][1] = 0; - s->mv_type = MV_TYPE_16X16; - s->mv_dir = MV_DIR_FORWARD; /* Get the GOB height based on picture height */ if (s->out_format == FMT_H263 && !s->h263_pred && !s->h263_msmpeg4) { @@ -1264,12 +1383,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->avg_mb_var = s->avg_mb_var / s->mb_num; - s->block_wrap[0]= - s->block_wrap[1]= - s->block_wrap[2]= - s->block_wrap[3]= s->mb_width*2 + 2; - s->block_wrap[4]= - s->block_wrap[5]= s->mb_width + 2; for(mb_y=0; mb_y < s->mb_height; mb_y++) { /* Put GOB header based on RTP MTU */ /* TODO: Put all this stuff in a separate generic function */ @@ -1292,6 +1405,11 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->block_index[4]= s->block_wrap[4]*(mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2); s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2); for(mb_x=0; mb_x < s->mb_width; mb_x++) { + const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x]; + PutBitContext pb; + int d; + int dmin=10000000; + int best=0; s->mb_x = mb_x; s->mb_y = mb_y; @@ -1301,124 +1419,78 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->block_index[3]+=2; s->block_index[4]++; s->block_index[5]++; -#if 0 - /* compute motion vector and macro block type (intra or non intra) */ - motion_x = 0; - motion_y = 0; - if (s->pict_type == P_TYPE) { - s->mb_intra = estimate_motion(s, mb_x, mb_y, - &motion_x, - &motion_y); - } else { - s->mb_intra = 1; - } -#endif - s->mb_intra = s->mb_type[mb_y * s->mb_width + mb_x]; - motion_x = s->mv_table[0][mb_y * s->mb_width + mb_x]; - motion_y = s->mv_table[1][mb_y * s->mb_width + mb_x]; - - /* get the pixels */ - wrap = s->linesize; - ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16; - get_pixels(s->block[0], ptr, wrap); - get_pixels(s->block[1], ptr + 8, wrap); - get_pixels(s->block[2], ptr + 8 * wrap, wrap); - get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap); - wrap = s->linesize >> 1; - ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8; - get_pixels(s->block[4], ptr, wrap); + s->mv_type = MV_TYPE_16X16; + s->mv_dir = MV_DIR_FORWARD; + if(mb_type & (mb_type-1)){ // more than 1 MB type possible + pb= s->pb; + if(mb_type&MB_TYPE_INTER){ + s->mb_intra= 0; + s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x]; + s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x]; + init_put_bits(&s->pb, bit_buf[1], 3000, NULL, NULL); + s->block= s->inter_block; - wrap = s->linesize >> 1; - ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8; - get_pixels(s->block[5], ptr, wrap); - - /* subtract previous frame if non intra */ - if (!s->mb_intra) { - int dxy, offset, mx, my; - - dxy = ((motion_y & 1) << 1) | (motion_x & 1); - ptr = s->last_picture[0] + - ((mb_y * 16 + (motion_y >> 1)) * s->linesize) + - (mb_x * 16 + (motion_x >> 1)); - - sub_pixels_2(s->block[0], ptr, s->linesize, dxy); - sub_pixels_2(s->block[1], ptr + 8, s->linesize, dxy); - sub_pixels_2(s->block[2], ptr + s->linesize * 8, s->linesize, dxy); - sub_pixels_2(s->block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy); - - if (s->out_format == FMT_H263) { - /* special rounding for h263 */ - dxy = 0; - if ((motion_x & 3) != 0) - dxy |= 1; - if ((motion_y & 3) != 0) - dxy |= 2; - mx = motion_x >> 2; - my = motion_y >> 2; - } else { - mx = motion_x / 2; - my = motion_y / 2; - dxy = ((my & 1) << 1) | (mx & 1); - mx >>= 1; - my >>= 1; + encode_mb(s); + d= get_bit_count(&s->pb); + if(dpb); + dmin=d; + best_s.mv[0][0][0]= s->mv[0][0][0]; + best_s.mv[0][0][1]= s->mv[0][0][1]; + best_s.mb_intra= 0; + best_s.pb=s->pb; + best_s.block= s->block; + best=1; + for(i=0; i<6; i++) + best_s.block_last_index[i]= s->block_last_index[i]; + } } - offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx); - ptr = s->last_picture[1] + offset; - sub_pixels_2(s->block[4], ptr, s->linesize >> 1, dxy); - ptr = s->last_picture[2] + offset; - sub_pixels_2(s->block[5], ptr, s->linesize >> 1, dxy); - } - emms_c(); - -#if 0 - { - float adap_parm; - - adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) / - ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0); - - printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", - (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', - s->qscale, adap_parm, s->qscale*adap_parm, - s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var); - } -#endif - /* DCT & quantize */ - if (s->h263_msmpeg4) { - msmpeg4_dc_scale(s); - } else if (s->h263_pred) { - h263_dc_scale(s); + if(mb_type&MB_TYPE_INTRA){ + s->mb_intra= 1; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + init_put_bits(&s->pb, bit_buf[0], 3000, NULL, NULL); + s->block= s->intra_block; + + encode_mb(s); + d= get_bit_count(&s->pb); + if(dpb); + dmin=d; + best_s.mv[0][0][0]= 0; + best_s.mv[0][0][1]= 0; + best_s.mb_intra= 1; + best_s.pb=s->pb; + best_s.block= s->block; + for(i=0; i<6; i++) + best_s.block_last_index[i]= s->block_last_index[i]; + best=0; + } + /* force cleaning of ac/dc if needed ... */ + s->mbintra_table[mb_x + mb_y*s->mb_width]=1; + } + s->mv[0][0][0]= best_s.mv[0][0][0]; + s->mv[0][0][1]= best_s.mv[0][0][1]; + s->mb_intra= best_s.mb_intra; + for(i=0; i<6; i++) + s->block_last_index[i]= best_s.block_last_index[i]; + copy_bits(&pb, bit_buf[best], dmin); + s->block= best_s.block; + s->pb= pb; } else { - /* default quantization values */ - s->y_dc_scale = 8; - s->c_dc_scale = 8; + // only one MB-Type possible + if(mb_type&MB_TYPE_INTRA){ + s->mb_intra= 1; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + }else{ + s->mb_intra= 0; + s->mv[0][0][0] = s->mv_table[0][mb_y * s->mb_width + mb_x]; + s->mv[0][0][1] = s->mv_table[1][mb_y * s->mb_width + mb_x]; + } + encode_mb(s); } - for(i=0;i<6;i++) { - s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale); - } - - /* huffman encode */ - switch(s->out_format) { - case FMT_MPEG1: - mpeg1_encode_mb(s, s->block, motion_x, motion_y); - break; - case FMT_H263: - if (s->h263_msmpeg4) - msmpeg4_encode_mb(s, s->block, motion_x, motion_y); - else if(s->h263_pred) - mpeg4_encode_mb(s, s->block, motion_x, motion_y); - else - h263_encode_mb(s, s->block, motion_x, motion_y); - break; - case FMT_MJPEG: - mjpeg_encode_mb(s, s->block); - break; - } - - /* decompress blocks so that we keep the state of the decoder */ - s->mv[0][0][0] = motion_x; - s->mv[0][0][1] = motion_y; MPV_decode_mb(s, s->block); } @@ -1437,6 +1509,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->first_gob_line = 0; } } + emms_c(); if (s->h263_msmpeg4 && s->pict_type == I_TYPE) msmpeg4_encode_ext_header(s); @@ -1454,7 +1527,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->ptr_lastgob = pbBufPtr(&s->pb); //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif); } - } static int dct_quantize_c(MpegEncContext *s, diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 49c36bec21..03e9eaf550 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -65,7 +65,7 @@ typedef struct MpegEncContext { int qmax; /* max qscale */ int max_qdiff; /* max qscale difference between frames */ int encoding; /* true if we are encoding (vs decoding) */ - int hq; /* set if CODEC_FLAG_HQ is used in AVCodecContext.flags */ + int flags; /* AVCodecContext.flags (HQ, MV4, ...) */ /* the following fields are managed internally by the encoder */ /* bit output */ @@ -141,8 +141,16 @@ typedef struct MpegEncContext { int mb_x, mb_y; int mb_incr; int mb_intra; - INT16 *mb_var; /* Table for MB variances */ - char *mb_type; /* Table for MB type */ + UINT16 *mb_var; /* Table for MB variances */ + UINT8 *mb_type; /* Table for MB type */ +#define MB_TYPE_INTRA 0x01 +#define MB_TYPE_INTER 0x02 +#define MB_TYPE_INTER4V 0x04 +#define MB_TYPE_SKIPED 0x08 +#define MB_TYPE_DIRECT 0x10 +#define MB_TYPE_FORWARD 0x20 +#define MB_TYPE_BACKWAD 0x40 +#define MB_TYPE_BIDIR 0x80 int block_index[6]; int block_wrap[6]; @@ -295,7 +303,10 @@ typedef struct MpegEncContext { UINT8 *ptr_last_mb_line; UINT32 mb_line_avgsize; - DCTELEM block[6][64] __align8; + DCTELEM (*block)[64]; /* points to one of the following blocks */ + DCTELEM intra_block[6][64] __align8; + DCTELEM inter_block[6][64] __align8; + DCTELEM inter4v_block[6][64] __align8; void (*dct_unquantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale); } MpegEncContext; @@ -311,9 +322,8 @@ void MPV_common_init_mmx(MpegEncContext *s); /* motion_est.c */ -int estimate_motion(MpegEncContext *s, - int mb_x, int mb_y, - int *mx_ptr, int *my_ptr); +void estimate_motion(MpegEncContext *s, + int mb_x, int mb_y); /* mpeg12.c */ extern INT16 default_intra_matrix[64];