diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index 7989ca8d14..32d995f4a1 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -1280,125 +1280,125 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame, do { ptrdiff_t yoff = 0, uvoff = 0; - s->b = s->b_base; - s->block = s->block_base; - s->uvblock[0] = s->uvblock_base[0]; - s->uvblock[1] = s->uvblock_base[1]; - s->eob = s->eob_base; - s->uveob[0] = s->uveob_base[0]; - s->uveob[1] = s->uveob_base[1]; + s->b = s->b_base; + s->block = s->block_base; + s->uvblock[0] = s->uvblock_base[0]; + s->uvblock[1] = s->uvblock_base[1]; + s->eob = s->eob_base; + s->uveob[0] = s->uveob_base[0]; + s->uveob[1] = s->uveob_base[1]; - for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) { - set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end, - tile_row, s->tiling.log2_tile_rows, s->sb_rows); + for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) { + set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end, + tile_row, s->tiling.log2_tile_rows, s->sb_rows); - if (s->pass != 2) { - for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) { - int64_t tile_size; + if (s->pass != 2) { + for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) { + int64_t tile_size; - if (tile_col == s->tiling.tile_cols - 1 && - tile_row == s->tiling.tile_rows - 1) { - tile_size = size; - } else { - tile_size = AV_RB32(data); - data += 4; - size -= 4; - } - if (tile_size > size) { - ret = AVERROR_INVALIDDATA; - goto fail; - } - ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size); - if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit - ret = AVERROR_INVALIDDATA; - goto fail; - } - data += tile_size; - size -= tile_size; - } - } - - for (row = s->tiling.tile_row_start; - row < s->tiling.tile_row_end; - row += 8, yoff += f->linesize[0] * 64, - uvoff += f->linesize[1] * 32) { - VP9Filter *lflvl = s->lflvl; - ptrdiff_t yoff2 = yoff, uvoff2 = uvoff; - - for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) { - set_tile_offset(&s->tiling.tile_col_start, - &s->tiling.tile_col_end, - tile_col, s->tiling.log2_tile_cols, s->sb_cols); - - memset(s->left_partition_ctx, 0, 8); - memset(s->left_skip_ctx, 0, 8); - if (s->keyframe || s->intraonly) - memset(s->left_mode_ctx, DC_PRED, 16); - else - memset(s->left_mode_ctx, NEARESTMV, 8); - memset(s->left_y_nnz_ctx, 0, 16); - memset(s->left_uv_nnz_ctx, 0, 16); - memset(s->left_segpred_ctx, 0, 8); - - memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c)); - for (col = s->tiling.tile_col_start; - col < s->tiling.tile_col_end; - col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) { - // FIXME integrate with lf code (i.e. zero after each - // use, similar to invtxfm coefficients, or similar) - if (s->pass != 1) - memset(lflvl->mask, 0, sizeof(lflvl->mask)); - - if (s->pass == 2) { - ret = decode_superblock_mem(avctx, row, col, lflvl, - yoff2, uvoff2, BL_64X64); + if (tile_col == s->tiling.tile_cols - 1 && + tile_row == s->tiling.tile_rows - 1) { + tile_size = size; } else { - ret = decode_subblock(avctx, row, col, lflvl, - yoff2, uvoff2, BL_64X64); + tile_size = AV_RB32(data); + data += 4; + size -= 4; } - if (ret < 0) + if (tile_size > size) { + ret = AVERROR_INVALIDDATA; goto fail; + } + ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size); + if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit + ret = AVERROR_INVALIDDATA; + goto fail; + } + data += tile_size; + size -= tile_size; } - if (s->pass != 2) - memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c)); } - if (s->pass == 1) - continue; + for (row = s->tiling.tile_row_start; + row < s->tiling.tile_row_end; + row += 8, yoff += f->linesize[0] * 64, + uvoff += f->linesize[1] * 32) { + VP9Filter *lflvl = s->lflvl; + ptrdiff_t yoff2 = yoff, uvoff2 = uvoff; - // backup pre-loopfilter reconstruction data for intra - // prediction of next row of sb64s - if (row + 8 < s->rows) { - memcpy(s->intra_pred_data[0], - f->data[0] + yoff + - 63 * f->linesize[0], - 8 * s->cols); - memcpy(s->intra_pred_data[1], - f->data[1] + uvoff + - 31 * f->linesize[1], - 4 * s->cols); - memcpy(s->intra_pred_data[2], - f->data[2] + uvoff + - 31 * f->linesize[2], - 4 * s->cols); + for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) { + set_tile_offset(&s->tiling.tile_col_start, + &s->tiling.tile_col_end, + tile_col, s->tiling.log2_tile_cols, s->sb_cols); + + memset(s->left_partition_ctx, 0, 8); + memset(s->left_skip_ctx, 0, 8); + if (s->keyframe || s->intraonly) + memset(s->left_mode_ctx, DC_PRED, 16); + else + memset(s->left_mode_ctx, NEARESTMV, 8); + memset(s->left_y_nnz_ctx, 0, 16); + memset(s->left_uv_nnz_ctx, 0, 16); + memset(s->left_segpred_ctx, 0, 8); + + memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c)); + for (col = s->tiling.tile_col_start; + col < s->tiling.tile_col_end; + col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) { + // FIXME integrate with lf code (i.e. zero after each + // use, similar to invtxfm coefficients, or similar) + if (s->pass != 1) + memset(lflvl->mask, 0, sizeof(lflvl->mask)); + + if (s->pass == 2) { + ret = decode_superblock_mem(avctx, row, col, lflvl, + yoff2, uvoff2, BL_64X64); + } else { + ret = decode_subblock(avctx, row, col, lflvl, + yoff2, uvoff2, BL_64X64); + } + if (ret < 0) + goto fail; + } + if (s->pass != 2) + memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c)); + } + + if (s->pass == 1) + continue; + + // backup pre-loopfilter reconstruction data for intra + // prediction of next row of sb64s + if (row + 8 < s->rows) { + memcpy(s->intra_pred_data[0], + f->data[0] + yoff + + 63 * f->linesize[0], + 8 * s->cols); + memcpy(s->intra_pred_data[1], + f->data[1] + uvoff + + 31 * f->linesize[1], + 4 * s->cols); + memcpy(s->intra_pred_data[2], + f->data[2] + uvoff + + 31 * f->linesize[2], + 4 * s->cols); + } + + // loopfilter one row + if (s->filter.level) { + yoff2 = yoff; + uvoff2 = uvoff; + lflvl = s->lflvl; + for (col = 0; col < s->cols; + col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) + loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2); + } + + // FIXME maybe we can make this more finegrained by running the + // loopfilter per-block instead of after each sbrow + // In fact that would also make intra pred left preparation easier? + ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0); } - - // loopfilter one row - if (s->filter.level) { - yoff2 = yoff; - uvoff2 = uvoff; - lflvl = s->lflvl; - for (col = 0; col < s->cols; - col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) - loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2); - } - - // FIXME maybe we can make this more finegrained by running the - // loopfilter per-block instead of after each sbrow - // In fact that would also make intra pred left preparation easier? - ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0); } - } if (s->pass < 2 && s->refreshctx && !s->parallelmode) { ff_vp9_adapt_probs(s); @@ -1584,16 +1584,16 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo } AVCodec ff_vp9_decoder = { - .name = "vp9", - .long_name = NULL_IF_CONFIG_SMALL("Google VP9"), - .type = AVMEDIA_TYPE_VIDEO, - .id = AV_CODEC_ID_VP9, - .priv_data_size = sizeof(VP9Context), - .init = vp9_decode_init, - .decode = vp9_decode_packet, - .flush = vp9_decode_flush, - .close = vp9_decode_free, - .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, + .name = "vp9", + .long_name = NULL_IF_CONFIG_SMALL("Google VP9"), + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_VP9, + .priv_data_size = sizeof(VP9Context), + .init = vp9_decode_init, + .decode = vp9_decode_packet, + .flush = vp9_decode_flush, + .close = vp9_decode_free, + .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, .init_thread_copy = vp9_decode_init, .update_thread_context = vp9_decode_update_thread_context, };