avcodec/mpegpicture: Move mb_var, mc_mb_var and mb_mean to MpegEncCtx

These tables are only used by encoders and only for the current picture;
ergo they need not be put into the picture at all, but rather into
the encoder's context. They also don't need to be refcounted,
because there is only one owner.

In contrast to this, the earlier code refcounts them which
incurs unnecessary overhead. These references are not unreferenced
in ff_mpeg_unref_picture() (they are kept in order to have something
like a buffer pool), so that several buffers are kept at the same
time, although only one is needed, thereby wasting memory.

The code also propagates references to other pictures not part of
the pictures array (namely the copy of the current/next/last picture
in the MpegEncContext which get references of their own). These
references are not unreferenced in ff_mpeg_unref_picture() (the
buffers are probably kept in order to have something like a pool),
yet if the current picture is a B-frame, it gets unreferenced
at the end of ff_mpv_encode_picture() and its slot in the picture
array will therefore be reused the next time; but the copy of the
current picture also still has its references and therefore
these buffers will be made duplicated in order to make them writable
in the next call to ff_mpv_encode_picture(). This is of course
unnecessary.

Finally, ff_find_unused_picture() is supposed to just return
any unused picture and the code is supposed to work with it;
yet for the vsynth*-mpeg4-adap tests the result depends upon
the content of these buffers; given that this patchset
changes the content of these buffers (the initial content is now
the state of these buffers after encoding the last frame;
before this patch the buffers used came from the last picture
that occupied the same slot in the picture array) their ref-files
needed to be changed. This points to a bug somewhere (if one removes
the initialization, one gets uninitialized reads in
adaptive_quantization in ratecontrol.c).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt 2022-08-07 09:02:51 +02:00
parent 109515e16d
commit bc109a53c7
12 changed files with 64 additions and 85 deletions

View File

@ -895,7 +895,6 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
int P[10][2];
const int shift= 1+s->quarter_sample;
int mb_type=0;
Picture * const pic= &s->current_picture;
init_ref(c, s->new_picture->data, s->last_picture.f->data, NULL, 16*mb_x, 16*mb_y, 0);
@ -917,8 +916,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
varc = s->mpvencdsp.pix_norm1(pix, s->linesize) -
(((unsigned) sum * sum) >> 8) + 500;
pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
s->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
s->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
c->mb_var_sum_temp += (varc+128)>>8;
if (s->motion_est != FF_ME_ZERO) {
@ -965,7 +964,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
vard = s->mecc.sse[0](NULL, pix, ppix, s->linesize, 16);
pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
s->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
c->mc_mb_var_sum_temp += (vard+128)>>8;
if (c->avctx->mb_decision > FF_MB_DECISION_SIMPLE) {
@ -1509,7 +1508,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
score= ((unsigned)(score*score + 128*256))>>16;
c->mc_mb_var_sum_temp += score;
s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
s->mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;
return;
@ -1574,7 +1573,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
score= ((unsigned)(score*score + 128*256))>>16;
c->mc_mb_var_sum_temp += score;
s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
s->mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
}
if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
@ -1629,7 +1628,8 @@ int ff_get_best_fcode(MpegEncContext * s, const int16_t (*mv_table)[2], int type
continue;
for(j=0; j<fcode && j<8; j++){
if(s->pict_type==AV_PICTURE_TYPE_B || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
if (s->pict_type == AV_PICTURE_TYPE_B ||
s->mc_mb_var[xy] < s->mb_var[xy])
score[j]-= 170;
}
}

View File

@ -37,9 +37,6 @@ static void av_noinline free_picture_tables(Picture *pic)
pic->alloc_mb_width =
pic->alloc_mb_height = 0;
av_buffer_unref(&pic->mb_var_buf);
av_buffer_unref(&pic->mc_mb_var_buf);
av_buffer_unref(&pic->mb_mean_buf);
av_buffer_unref(&pic->mbskip_table_buf);
av_buffer_unref(&pic->qscale_table_buf);
av_buffer_unref(&pic->mb_type_buf);
@ -60,9 +57,6 @@ do {\
return ret;\
} while (0)
MAKE_WRITABLE(mb_var_buf);
MAKE_WRITABLE(mc_mb_var_buf);
MAKE_WRITABLE(mb_mean_buf);
MAKE_WRITABLE(mbskip_table_buf);
MAKE_WRITABLE(qscale_table_buf);
MAKE_WRITABLE(mb_type_buf);
@ -218,14 +212,6 @@ static int alloc_picture_tables(AVCodecContext *avctx, Picture *pic, int encodin
if (!pic->mbskip_table_buf || !pic->qscale_table_buf || !pic->mb_type_buf)
return AVERROR(ENOMEM);
if (encoding) {
pic->mb_var_buf = av_buffer_allocz(mb_array_size * sizeof(int16_t));
pic->mc_mb_var_buf = av_buffer_allocz(mb_array_size * sizeof(int16_t));
pic->mb_mean_buf = av_buffer_allocz(mb_array_size);
if (!pic->mb_var_buf || !pic->mc_mb_var_buf || !pic->mb_mean_buf)
return AVERROR(ENOMEM);
}
if (out_format == FMT_H263 || encoding ||
(avctx->export_side_data & AV_CODEC_EXPORT_DATA_MVS)) {
int mv_size = 2 * (b8_array_size + 4) * sizeof(int16_t);
@ -285,12 +271,6 @@ int ff_alloc_picture(AVCodecContext *avctx, Picture *pic, MotionEstContext *me,
if (ret < 0)
goto fail;
if (encoding) {
pic->mb_var = (uint16_t*)pic->mb_var_buf->data;
pic->mc_mb_var = (uint16_t*)pic->mc_mb_var_buf->data;
pic->mb_mean = pic->mb_mean_buf->data;
}
pic->mbskip_table = pic->mbskip_table_buf->data;
pic->qscale_table = pic->qscale_table_buf->data + 2 * mb_stride + 1;
pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * mb_stride + 1;
@ -316,7 +296,7 @@ fail:
*/
void ff_mpeg_unref_picture(AVCodecContext *avctx, Picture *pic)
{
int off = offsetof(Picture, mb_mean) + sizeof(pic->mb_mean);
int off = offsetof(Picture, hwaccel_priv_buf) + sizeof(pic->hwaccel_priv_buf);
pic->tf.f = pic->f;
/* WM Image / Screen codecs allocate internal buffers with different
@ -340,10 +320,7 @@ int ff_update_picture_tables(Picture *dst, const Picture *src)
{
int i, ret;
ret = av_buffer_replace(&dst->mb_var_buf, src->mb_var_buf);
ret |= av_buffer_replace(&dst->mc_mb_var_buf, src->mc_mb_var_buf);
ret |= av_buffer_replace(&dst->mb_mean_buf, src->mb_mean_buf);
ret |= av_buffer_replace(&dst->mbskip_table_buf, src->mbskip_table_buf);
ret = av_buffer_replace(&dst->mbskip_table_buf, src->mbskip_table_buf);
ret |= av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf);
ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf);
for (i = 0; i < 2; i++) {
@ -356,9 +333,6 @@ int ff_update_picture_tables(Picture *dst, const Picture *src)
return ret;
}
dst->mb_var = src->mb_var;
dst->mc_mb_var = src->mc_mb_var;
dst->mb_mean = src->mb_mean;
dst->mbskip_table = src->mbskip_table;
dst->qscale_table = src->qscale_table;
dst->mb_type = src->mb_type;

View File

@ -62,19 +62,10 @@ typedef struct Picture {
AVBufferRef *ref_index_buf[2];
int8_t *ref_index[2];
AVBufferRef *mb_var_buf;
uint16_t *mb_var; ///< Table for MB variances
AVBufferRef *mc_mb_var_buf;
uint16_t *mc_mb_var; ///< Table for motion compensated MB variances
int alloc_mb_width; ///< mb_width used to allocate tables
int alloc_mb_height; ///< mb_height used to allocate tables
int alloc_mb_stride; ///< mb_stride used to allocate tables
AVBufferRef *mb_mean_buf;
uint8_t *mb_mean; ///< Table for MB luminance
AVBufferRef *hwaccel_priv_buf;
void *hwaccel_picture_private; ///< Hardware accelerator private data

View File

@ -235,6 +235,12 @@ typedef struct MpegEncContext {
int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced B-frame encoding
uint8_t (*p_field_select_table[2]); ///< Only the first element is allocated
uint8_t (*b_field_select_table[2][2]); ///< Only the first element is allocated
/* The following three arrays are encoder-only */
uint16_t *mb_var; ///< Table for MB variances
uint16_t *mc_mb_var; ///< Table for motion compensated MB variances
uint8_t *mb_mean; ///< Table for MB luminance
int motion_est; ///< ME algorithm
int me_penalty_compensation;
int me_pre; ///< prepass for motion estimation

View File

@ -313,6 +313,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
MpegEncContext *s = avctx->priv_data;
AVCPBProperties *cpb_props;
int i, ret;
int mb_array_size;
mpv_encode_defaults(s);
@ -823,6 +824,12 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
!FF_ALLOCZ_TYPED_ARRAY(s->reordered_input_picture, MAX_PICTURE_COUNT))
return AVERROR(ENOMEM);
mb_array_size = s->mb_stride * s->mb_height;
if (!FF_ALLOCZ_TYPED_ARRAY(s->mc_mb_var, mb_array_size) ||
!FF_ALLOCZ_TYPED_ARRAY(s->mb_var, mb_array_size) ||
!(s->mb_mean = av_mallocz(mb_array_size)))
return AVERROR(ENOMEM);
if (s->noise_reduction) {
if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_offset, 2))
return AVERROR(ENOMEM);
@ -949,6 +956,9 @@ av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
av_freep(&s->input_picture);
av_freep(&s->reordered_input_picture);
av_freep(&s->dct_offset);
av_freep(&s->mb_var);
av_freep(&s->mc_mb_var);
av_freep(&s->mb_mean);
return 0;
}
@ -2235,8 +2245,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
}
}
/* pre quantization */
if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2 * s->qscale * s->qscale) {
if (s->mc_mb_var[s->mb_stride * mb_y + mb_x] < 2 * s->qscale * s->qscale) {
// FIXME optimize
if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
skip_dct[0] = 1;
@ -2648,8 +2657,8 @@ static int mb_var_thread(AVCodecContext *c, void *arg){
varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
(((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
s->mb_var [s->mb_stride * mb_y + mb_x] = varc;
s->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
s->me.mb_var_sum_temp += varc;
}
}

View File

@ -751,15 +751,14 @@ static void adaptive_quantization(MpegEncContext *s, double q)
float *bits_tab = s->bits_tab;
const int qmin = s->avctx->mb_lmin;
const int qmax = s->avctx->mb_lmax;
Picture *const pic = &s->current_picture;
const int mb_width = s->mb_width;
const int mb_height = s->mb_height;
for (i = 0; i < s->mb_num; i++) {
const int mb_xy = s->mb_index2xy[i];
float temp_cplx = sqrt(pic->mc_mb_var[mb_xy]); // FIXME merge in pow()
float spat_cplx = sqrt(pic->mb_var[mb_xy]);
const int lumi = pic->mb_mean[mb_xy];
float temp_cplx = sqrt(s->mc_mb_var[mb_xy]); // FIXME merge in pow()
float spat_cplx = sqrt(s->mb_var[mb_xy]);
const int lumi = s->mb_mean[mb_xy];
float bits, cplx, factor;
int mb_x = mb_xy % s->mb_stride;
int mb_y = mb_xy / s->mb_stride;

View File

@ -313,9 +313,9 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
s->m.mb_type = s->mb_type;
// dummies, to avoid segfaults
s->m.current_picture.mb_mean = (uint8_t *)s->dummy;
s->m.current_picture.mb_var = (uint16_t *)s->dummy;
s->m.current_picture.mc_mb_var = (uint16_t *)s->dummy;
s->m.mb_mean = (uint8_t *)s->dummy;
s->m.mb_var = (uint16_t *)s->dummy;
s->m.mc_mb_var = (uint16_t *)s->dummy;
s->m.current_picture.mb_type = s->dummy;
s->m.current_picture.motion_val[0] = s->motion_val8[plane] + 2;

View File

@ -2,45 +2,45 @@ ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 5652 size: 6855
ret: 0 st:-1 flags:0 ts:-1.000000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 5652 size: 6855
ret: 0 st:-1 flags:1 ts: 1.894167
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 174446 size: 16883
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 161318 size: 19176
ret: 0 st: 0 flags:0 ts: 0.800000
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 98220 size: 17063
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 75056 size: 19178
ret:-1 st: 0 flags:1 ts:-0.320000
ret:-1 st:-1 flags:0 ts: 2.576668
ret: 0 st:-1 flags:1 ts: 1.470835
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 135582 size: 17525
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 118696 size: 20018
ret: 0 st: 0 flags:0 ts: 0.360000
ret: 0 st: 0 flags:1 dts: 0.400000 pts: NOPTS pos: 59446 size: 17261
ret: 0 st: 0 flags:1 dts: 0.400000 pts: NOPTS pos: 35800 size: 17261
ret:-1 st: 0 flags:1 ts:-0.760000
ret:-1 st:-1 flags:0 ts: 2.153336
ret: 0 st:-1 flags:1 ts: 1.047503
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 98220 size: 17063
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 75056 size: 19178
ret: 0 st: 0 flags:0 ts:-0.040000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 5652 size: 6855
ret: 0 st: 0 flags:1 ts: 2.840000
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 174446 size: 16883
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 161318 size: 19176
ret: 0 st:-1 flags:0 ts: 1.730004
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 174446 size: 16883
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 161318 size: 19176
ret: 0 st:-1 flags:1 ts: 0.624171
ret: 0 st: 0 flags:1 dts: 0.400000 pts: NOPTS pos: 59446 size: 17261
ret: 0 st: 0 flags:1 dts: 0.400000 pts: NOPTS pos: 35800 size: 17261
ret: 0 st: 0 flags:0 ts:-0.480000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 5652 size: 6855
ret: 0 st: 0 flags:1 ts: 2.400000
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 174446 size: 16883
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 161318 size: 19176
ret: 0 st:-1 flags:0 ts: 1.306672
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 135582 size: 17525
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 118696 size: 20018
ret: 0 st:-1 flags:1 ts: 0.200839
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 5652 size: 6855
ret: 0 st: 0 flags:0 ts:-0.920000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 5652 size: 6855
ret: 0 st: 0 flags:1 ts: 2.000000
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 174446 size: 16883
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 161318 size: 19176
ret: 0 st:-1 flags:0 ts: 0.883340
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 98220 size: 17063
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 75056 size: 19178
ret:-1 st:-1 flags:1 ts:-0.222493
ret:-1 st: 0 flags:0 ts: 2.680000
ret: 0 st: 0 flags:1 ts: 1.560000
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 135582 size: 17525
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 118696 size: 20018
ret: 0 st:-1 flags:0 ts: 0.460008
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 98220 size: 17063
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 75056 size: 19178
ret:-1 st:-1 flags:1 ts:-0.645825

View File

@ -1,4 +1,4 @@
f120f0bf976bb510c5b5305fe7d8159a *tests/data/fate/vsynth1-mpeg4-adap.avi
403436 tests/data/fate/vsynth1-mpeg4-adap.avi
fad0b9dc08fe4a95b297af1a7411c1e9 *tests/data/fate/vsynth1-mpeg4-adap.out.rawvideo
stddev: 14.05 PSNR: 25.17 MAXDIFF: 184 bytes: 7603200/ 7603200
0f1cbbdc3f9b91f2d9ac3d1fc2cf7d4e *tests/data/fate/vsynth1-mpeg4-adap.avi
325518 tests/data/fate/vsynth1-mpeg4-adap.avi
1e6c596f9f491fbf15920ef1bace7fb8 *tests/data/fate/vsynth1-mpeg4-adap.out.rawvideo
stddev: 14.12 PSNR: 25.13 MAXDIFF: 184 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
4bff98da2342836476da817428594403 *tests/data/fate/vsynth2-mpeg4-adap.avi
213508 tests/data/fate/vsynth2-mpeg4-adap.avi
0c709f2b81f4593eaa29490332c2cb39 *tests/data/fate/vsynth2-mpeg4-adap.out.rawvideo
stddev: 4.87 PSNR: 34.36 MAXDIFF: 86 bytes: 7603200/ 7603200
06a397fe43dab7b6cf56870410fbbbaf *tests/data/fate/vsynth2-mpeg4-adap.avi
203000 tests/data/fate/vsynth2-mpeg4-adap.avi
686565d42d8ba5aea790824b04fa0a18 *tests/data/fate/vsynth2-mpeg4-adap.out.rawvideo
stddev: 4.55 PSNR: 34.95 MAXDIFF: 84 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
c16e5c2436ca9953517eadba562768e9 *tests/data/fate/vsynth3-mpeg4-adap.avi
43706 tests/data/fate/vsynth3-mpeg4-adap.avi
b42b614e19e7c4859fca1af6d4e36eae *tests/data/fate/vsynth3-mpeg4-adap.out.rawvideo
stddev: 5.48 PSNR: 33.34 MAXDIFF: 53 bytes: 86700/ 86700
6b2f641f2e68b11b992fd6ba1ed66a21 *tests/data/fate/vsynth3-mpeg4-adap.avi
41012 tests/data/fate/vsynth3-mpeg4-adap.avi
3483a2032cb02c3a37f5e43b128e59ed *tests/data/fate/vsynth3-mpeg4-adap.out.rawvideo
stddev: 5.79 PSNR: 32.87 MAXDIFF: 49 bytes: 86700/ 86700

View File

@ -1,4 +1,4 @@
c6108621b1202d32dac68b1944c5b8c2 *tests/data/fate/vsynth_lena-mpeg4-adap.avi
198500 tests/data/fate/vsynth_lena-mpeg4-adap.avi
87b6dbe98d276137fceaae2fa672eced *tests/data/fate/vsynth_lena-mpeg4-adap.out.rawvideo
stddev: 3.75 PSNR: 36.65 MAXDIFF: 71 bytes: 7603200/ 7603200
633da125f46391eef33bb031cd728f4b *tests/data/fate/vsynth_lena-mpeg4-adap.avi
187598 tests/data/fate/vsynth_lena-mpeg4-adap.avi
21312bfcb28c40299fb27a5b03477f8c *tests/data/fate/vsynth_lena-mpeg4-adap.out.rawvideo
stddev: 3.63 PSNR: 36.92 MAXDIFF: 71 bytes: 7603200/ 7603200