avcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb()

For 422 frames we should not use hard coded 8 to calculate mb size for
uv plane. Chroma shift should be taken into consideration to be
compatiple with different sampling format.

The error is reported by fate test when av_cpu_max_align() return 64
on the platform supporting AVX512. This is a hidden error and it is
exposed after commit 17a59a634c.

mpeg2enc has a mechanism to reuse frames. When it computes SSE (sum of
squared error) on current mb, reconstructed mb will be wrote to the
previous mb space, so that the memory can be saved. However if the align
is 64, the frame is shared in somewhere else, so the frame cannot be
reused and a new frame to store reconstrued data is created. Because the
height of mb is wrong when compute sse on 422 frame, starting from the
second line of macro block, changed data is read when frame is reused
(we need to read row 16 rather than row 8 if frame is 422), and unchanged
data is read when frame is not reused (a new frame is created so the
original frame will not be changed).

That is why commit 17a59a634c exposes this
issue, because it add av_cpu_max_align() and this function return 64 on
platform supporting AVX512 which lead to creating a frame in mpeg2enc,
and this lead to the different outputs.

Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
Signed-off-by: Marton Balint <cus@passwd.hu>
This commit is contained in:
Wenbin Chen 2022-07-04 11:17:34 +08:00 committed by Marton Balint
parent 8393c0721a
commit 76b33704b6
6 changed files with 50 additions and 42 deletions

View File

@ -2558,6 +2558,8 @@ static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, in
static int sse_mb(MpegEncContext *s){
int w= 16;
int h= 16;
int chroma_mb_w = w >> s->chroma_x_shift;
int chroma_mb_h = h >> s->chroma_y_shift;
if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
@ -2565,17 +2567,23 @@ static int sse_mb(MpegEncContext *s){
if(w==16 && h==16)
if(s->avctx->mb_cmp == FF_CMP_NSSE){
return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16, s->dest[0], s->linesize, 16) +
s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x * 8 + s->mb_y * s->uvlinesize * 8, s->dest[1], s->uvlinesize, 8) +
s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x * 8 + s->mb_y * s->uvlinesize * 8, s->dest[2], s->uvlinesize, 8);
s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
s->dest[1], s->uvlinesize, chroma_mb_h) +
s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
s->dest[2], s->uvlinesize, chroma_mb_h);
}else{
return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16, s->dest[0], s->linesize, 16) +
s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x * 8 + s->mb_y * s->uvlinesize * 8, s->dest[1], s->uvlinesize, 8) +
s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x * 8 + s->mb_y * s->uvlinesize * 8, s->dest[2], s->uvlinesize, 8);
s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
s->dest[1], s->uvlinesize, chroma_mb_h) +
s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
s->dest[2], s->uvlinesize, chroma_mb_h);
}
else
return sse(s, s->new_picture->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
+sse(s, s->new_picture->data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
+sse(s, s->new_picture->data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
+sse(s, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
s->dest[1], w >> s->chroma_x_shift, h >> s->chroma_y_shift, s->uvlinesize)
+sse(s, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
s->dest[2], w >> s->chroma_x_shift, h >> s->chroma_y_shift, s->uvlinesize);
}
static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){

View File

@ -1,46 +1,46 @@
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 17497
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 18339
ret: 0 st:-1 flags:0 ts:-1.000000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 17497
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 18339
ret: 0 st:-1 flags:1 ts: 1.894167
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 325397 size: 19967
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 329242 size: 21479
ret: 0 st: 0 flags:0 ts: 0.788334
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 200747 size: 22575
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 203086 size: 24774
ret:-1 st: 0 flags:1 ts:-0.317499
ret:-1 st:-1 flags:0 ts: 2.576668
ret: 0 st:-1 flags:1 ts: 1.470835
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 265466 size: 21329
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 268154 size: 22845
ret: 0 st: 0 flags:0 ts: 0.365002
ret: 0 st: 0 flags:1 dts: 0.400000 pts: NOPTS pos: 104454 size: 28984
ret: 0 st: 0 flags:1 dts: 0.400000 pts: NOPTS pos: 108763 size: 32204
ret:-1 st: 0 flags:1 ts:-0.740831
ret:-1 st:-1 flags:0 ts: 2.153336
ret: 0 st:-1 flags:1 ts: 1.047503
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 200747 size: 22575
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 203086 size: 24774
ret: 0 st: 0 flags:0 ts:-0.058330
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 17497
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 18339
ret: 0 st: 0 flags:1 ts: 2.835837
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 325397 size: 19967
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 329242 size: 21479
ret: 0 st:-1 flags:0 ts: 1.730004
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 325397 size: 19967
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 329242 size: 21479
ret: 0 st:-1 flags:1 ts: 0.624171
ret: 0 st: 0 flags:1 dts: 0.400000 pts: NOPTS pos: 104454 size: 28984
ret: 0 st: 0 flags:1 dts: 0.400000 pts: NOPTS pos: 108763 size: 32204
ret: 0 st: 0 flags:0 ts:-0.481662
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 17497
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 18339
ret: 0 st: 0 flags:1 ts: 2.412505
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 325397 size: 19967
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 329242 size: 21479
ret: 0 st:-1 flags:0 ts: 1.306672
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 265466 size: 21329
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 268154 size: 22845
ret: 0 st:-1 flags:1 ts: 0.200839
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 17497
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 18339
ret: 0 st: 0 flags:0 ts:-0.904994
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 17497
ret: 0 st: 0 flags:1 dts: 0.000000 pts: NOPTS pos: 0 size: 18339
ret: 0 st: 0 flags:1 ts: 1.989173
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 325397 size: 19967
ret: 0 st: 0 flags:1 dts: 1.840000 pts: NOPTS pos: 329242 size: 21479
ret: 0 st:-1 flags:0 ts: 0.883340
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 265466 size: 21329
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 268154 size: 22845
ret:-1 st:-1 flags:1 ts:-0.222493
ret:-1 st: 0 flags:0 ts: 2.671674
ret: 0 st: 0 flags:1 ts: 1.565841
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 265466 size: 21329
ret: 0 st: 0 flags:1 dts: 1.360000 pts: NOPTS pos: 268154 size: 22845
ret: 0 st:-1 flags:0 ts: 0.460008
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 200747 size: 22575
ret: 0 st: 0 flags:1 dts: 0.880000 pts: NOPTS pos: 203086 size: 24774
ret:-1 st:-1 flags:1 ts:-0.645825

View File

@ -1,4 +1,4 @@
6e135a1a27235a320311a932147846b4 *tests/data/fate/vsynth1-mpeg2-422.mpeg2video
730780 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
0273cd8463d1fc115378748239951560 *tests/data/fate/vsynth1-mpeg2-422.out.rawvideo
stddev: 10.27 PSNR: 27.90 MAXDIFF: 162 bytes: 7603200/ 7603200
651f054730be04257498eeae08b3bdc5 *tests/data/fate/vsynth1-mpeg2-422.mpeg2video
855859 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
10fd55dbd10b08f9271eb89617766be8 *tests/data/fate/vsynth1-mpeg2-422.out.rawvideo
stddev: 9.38 PSNR: 28.68 MAXDIFF: 178 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
b2fa9b73c3547191ecc01b8163abd4e5 *tests/data/fate/vsynth2-mpeg2-422.mpeg2video
379164 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
704f6a96f93c2409219bd48b74169041 *tests/data/fate/vsynth2-mpeg2-422.out.rawvideo
stddev: 4.17 PSNR: 35.73 MAXDIFF: 70 bytes: 7603200/ 7603200
fcaf6242ca4b706d392e40d9631e3465 *tests/data/fate/vsynth2-mpeg2-422.mpeg2video
383946 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
474bb259f041bfb971691551f678fd09 *tests/data/fate/vsynth2-mpeg2-422.out.rawvideo
stddev: 3.93 PSNR: 36.23 MAXDIFF: 69 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
4d108b861715f1fa010fd70baea91793 *tests/data/fate/vsynth3-mpeg2-422.mpeg2video
68612 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
73b16e906d07b6bbccf4b00d4a25302c *tests/data/fate/vsynth3-mpeg2-422.out.rawvideo
stddev: 4.02 PSNR: 36.05 MAXDIFF: 46 bytes: 86700/ 86700
96dc854fc40a6410d4edc387b63ded1a *tests/data/fate/vsynth3-mpeg2-422.mpeg2video
75445 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
5337fc0bfb789919cb3325a02d48eb82 *tests/data/fate/vsynth3-mpeg2-422.out.rawvideo
stddev: 3.24 PSNR: 37.90 MAXDIFF: 27 bytes: 86700/ 86700

View File

@ -1,4 +1,4 @@
521ec92c0b8672011a43dd13db98c400 *tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
356431 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
51ca353620f85db8b5b1c56f1a275add *tests/data/fate/vsynth_lena-mpeg2-422.out.rawvideo
stddev: 3.15 PSNR: 38.14 MAXDIFF: 49 bytes: 7603200/ 7603200
87e5c69775b363db649c253050536045 *tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
361341 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
93cefcf101e6d0814acfaa626a02be90 *tests/data/fate/vsynth_lena-mpeg2-422.out.rawvideo
stddev: 2.97 PSNR: 38.67 MAXDIFF: 57 bytes: 7603200/ 7603200