mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-06 07:10:21 +00:00
vp9: increase buffer sizes for non-420 chroma subsamplings.
This commit is contained in:
parent
a03b057ede
commit
2d0bea4719
@ -215,7 +215,7 @@ typedef struct VP9Context {
|
||||
DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
|
||||
DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
|
||||
DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
|
||||
DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
|
||||
DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
|
||||
DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
|
||||
DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
|
||||
DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
|
||||
@ -248,8 +248,8 @@ typedef struct VP9Context {
|
||||
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
|
||||
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
|
||||
struct { int x, y; } min_mv, max_mv;
|
||||
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
|
||||
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
|
||||
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
|
||||
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64];
|
||||
uint16_t mvscale[3][2];
|
||||
uint8_t mvstep[3][2];
|
||||
} VP9Context;
|
||||
@ -307,39 +307,42 @@ static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int update_size(AVCodecContext *ctx, int w, int h)
|
||||
static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
|
||||
{
|
||||
VP9Context *s = ctx->priv_data;
|
||||
uint8_t *p;
|
||||
|
||||
av_assert0(w > 0 && h > 0);
|
||||
|
||||
if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
|
||||
if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
|
||||
return 0;
|
||||
|
||||
ctx->width = w;
|
||||
ctx->height = h;
|
||||
s->sb_cols = (w + 63) >> 6;
|
||||
s->sb_rows = (h + 63) >> 6;
|
||||
s->cols = (w + 7) >> 3;
|
||||
s->rows = (h + 7) >> 3;
|
||||
ctx->width = w;
|
||||
ctx->height = h;
|
||||
ctx->pix_fmt = fmt;
|
||||
s->sb_cols = (w + 63) >> 6;
|
||||
s->sb_rows = (h + 63) >> 6;
|
||||
s->cols = (w + 7) >> 3;
|
||||
s->rows = (h + 7) >> 3;
|
||||
|
||||
#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
|
||||
av_freep(&s->intra_pred_data[0]);
|
||||
p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
|
||||
// FIXME we slightly over-allocate here for subsampled chroma, but a little
|
||||
// bit of padding shouldn't affect performance...
|
||||
p = av_malloc(s->sb_cols * (320 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
|
||||
if (!p)
|
||||
return AVERROR(ENOMEM);
|
||||
assign(s->intra_pred_data[0], uint8_t *, 64);
|
||||
assign(s->intra_pred_data[1], uint8_t *, 32);
|
||||
assign(s->intra_pred_data[2], uint8_t *, 32);
|
||||
assign(s->intra_pred_data[1], uint8_t *, 64);
|
||||
assign(s->intra_pred_data[2], uint8_t *, 64);
|
||||
assign(s->above_y_nnz_ctx, uint8_t *, 16);
|
||||
assign(s->above_mode_ctx, uint8_t *, 16);
|
||||
assign(s->above_mv_ctx, VP56mv(*)[2], 16);
|
||||
assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
|
||||
assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
|
||||
assign(s->above_partition_ctx, uint8_t *, 8);
|
||||
assign(s->above_skip_ctx, uint8_t *, 8);
|
||||
assign(s->above_txfm_ctx, uint8_t *, 8);
|
||||
assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
|
||||
assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
|
||||
assign(s->above_segpred_ctx, uint8_t *, 8);
|
||||
assign(s->above_intra_ctx, uint8_t *, 8);
|
||||
assign(s->above_comp_ctx, uint8_t *, 8);
|
||||
@ -358,34 +361,39 @@ static int update_size(AVCodecContext *ctx, int w, int h)
|
||||
static int update_block_buffers(AVCodecContext *ctx)
|
||||
{
|
||||
VP9Context *s = ctx->priv_data;
|
||||
int chroma_blocks, chroma_eobs;
|
||||
|
||||
if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
|
||||
return 0;
|
||||
|
||||
av_free(s->b_base);
|
||||
av_free(s->block_base);
|
||||
chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
|
||||
chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
|
||||
if (s->frames[CUR_FRAME].uses_2pass) {
|
||||
int sbs = s->sb_cols * s->sb_rows;
|
||||
|
||||
s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
|
||||
s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
|
||||
s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
|
||||
16 * 16 + 2 * chroma_eobs) * sbs);
|
||||
if (!s->b_base || !s->block_base)
|
||||
return AVERROR(ENOMEM);
|
||||
s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
|
||||
s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
|
||||
s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
|
||||
s->uveob_base[0] = s->eob_base + 256 * sbs;
|
||||
s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
|
||||
s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks;
|
||||
s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks);
|
||||
s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
|
||||
s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
|
||||
} else {
|
||||
s->b_base = av_malloc(sizeof(VP9Block));
|
||||
s->block_base = av_mallocz((64 * 64 + 128) * 3);
|
||||
s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
|
||||
16 * 16 + 2 * chroma_eobs);
|
||||
if (!s->b_base || !s->block_base)
|
||||
return AVERROR(ENOMEM);
|
||||
s->uvblock_base[0] = s->block_base + 64 * 64;
|
||||
s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
|
||||
s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
|
||||
s->uveob_base[0] = s->eob_base + 256;
|
||||
s->uveob_base[1] = s->uveob_base[0] + 64;
|
||||
s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks;
|
||||
s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks);
|
||||
s->uveob_base[0] = s->eob_base + 16 * 16;
|
||||
s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
|
||||
}
|
||||
s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
|
||||
|
||||
@ -772,8 +780,8 @@ static int decode_frame_header(AVCodecContext *ctx,
|
||||
}
|
||||
|
||||
/* tiling info */
|
||||
if ((res = update_size(ctx, w, h)) < 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
|
||||
if ((res = update_size(ctx, w, h, fmt)) < 0) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
|
||||
return res;
|
||||
}
|
||||
for (s->tiling.log2_tile_cols = 0;
|
||||
@ -3960,7 +3968,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
|
||||
memset(s->left_mode_ctx, NEARESTMV, 8);
|
||||
}
|
||||
memset(s->left_y_nnz_ctx, 0, 16);
|
||||
memset(s->left_uv_nnz_ctx, 0, 16);
|
||||
memset(s->left_uv_nnz_ctx, 0, 32);
|
||||
memset(s->left_segpred_ctx, 0, 8);
|
||||
|
||||
memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
|
||||
@ -4089,7 +4097,6 @@ static av_cold int vp9_decode_init(AVCodecContext *ctx)
|
||||
VP9Context *s = ctx->priv_data;
|
||||
|
||||
ctx->internal->allocate_progress = 1;
|
||||
ctx->pix_fmt = AV_PIX_FMT_YUV420P;
|
||||
ff_vp9dsp_init(&s->dsp);
|
||||
ff_videodsp_init(&s->vdsp, 8);
|
||||
s->filter.sharpness = -1;
|
||||
|
Loading…
Reference in New Issue
Block a user