From 0b0953baecc5b0f8a85169c6bcc3db69a73f2205 Mon Sep 17 00:00:00 2001 From: Kostya Shishkov Date: Sat, 18 May 2013 12:07:30 +0200 Subject: [PATCH] proresenc: alpha coding support --- doc/encoders.texi | 6 + libavcodec/proresenc.c | 241 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 226 insertions(+), 21 deletions(-) diff --git a/doc/encoders.texi b/doc/encoders.texi index 8db148123b..abf5dbfb91 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -608,6 +608,7 @@ Select the ProRes profile to encode @item lt @item standard @item hq +@item 4444 @end table @item quant_mat @var{integer} @@ -637,6 +638,11 @@ Override the 4-byte vendor ID. A custom vendor ID like @var{apl0} would claim the stream was produced by the Apple encoder. +@item alpha_bits @var{integer} +Specify number of bits for alpha component. +Possible values are @var{0}, @var{8} and @var{16}. +Use @var{0} to disable alpha plane coding. + @end table @subsection Speed considerations diff --git a/libavcodec/proresenc.c b/libavcodec/proresenc.c index 5a8cea8e50..d989f8c228 100644 --- a/libavcodec/proresenc.c +++ b/libavcodec/proresenc.c @@ -21,6 +21,7 @@ */ #include "libavutil/opt.h" +#include "libavutil/pixdesc.h" #include "avcodec.h" #include "dsputil.h" #include "put_bits.h" @@ -34,13 +35,14 @@ #define MAX_MBS_PER_SLICE 8 -#define MAX_PLANES 3 // should be increased to 4 when there's AV_PIX_FMT_YUV444AP10 +#define MAX_PLANES 4 enum { PRORES_PROFILE_PROXY = 0, PRORES_PROFILE_LT, PRORES_PROFILE_STANDARD, PRORES_PROFILE_HQ, + PRORES_PROFILE_4444, }; enum { @@ -119,7 +121,7 @@ static const struct prores_profile { int max_quant; int br_tab[NUM_MB_LIMITS]; int quant; -} prores_profile_info[4] = { +} prores_profile_info[5] = { { .full_name = "proxy", .tag = MKTAG('a', 'p', 'c', 'o'), @@ -151,8 +153,15 @@ static const struct prores_profile { .max_quant = 6, .br_tab = { 1566, 1216, 1070, 950 }, .quant = QUANT_MAT_HQ, + }, + { + .full_name = "4444", + .tag = MKTAG('a', 'p', '4', 'h'), + .min_quant = 1, + .max_quant = 6, + .br_tab = { 2350, 1828, 1600, 1425 }, + .quant = QUANT_MAT_HQ, } -// for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 } }; #define TRELLIS_WIDTH 16 @@ -195,6 +204,7 @@ typedef struct ProresContext { int num_planes; int bits_per_mb; int force_quant; + int alpha_bits; char *vendor; int quant_sel; @@ -280,6 +290,34 @@ static void get_slice_data(ProresContext *ctx, const uint16_t *src, } } +static void get_alpha_data(ProresContext *ctx, const uint16_t *src, + int linesize, int x, int y, int w, int h, + int16_t *blocks, int mbs_per_slice, int abits) +{ + const int slice_width = 16 * mbs_per_slice; + int i, j, copy_w, copy_h; + + copy_w = FFMIN(w - x, slice_width); + copy_h = FFMIN(h - y, 16); + for (i = 0; i < copy_h; i++) { + memcpy(blocks, src, copy_w * sizeof(*src)); + if (abits == 8) + for (j = 0; j < copy_w; j++) + blocks[j] >>= 2; + else + for (j = 0; j < copy_w; j++) + blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4); + for (j = copy_w; j < slice_width; j++) + blocks[j] = blocks[copy_w - 1]; + blocks += slice_width; + src += linesize >> 1; + } + for (; i < 16; i++) { + memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks)); + blocks += slice_width; + } +} + /** * Write an unsigned rice/exp golomb codeword. */ @@ -394,6 +432,73 @@ static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb, return (put_bits_count(pb) - saved_pos) >> 3; } +static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits) +{ + const int mask = (1 << abits) - 1; + const int dbits = (abits == 8) ? 4 : 7; + const int dsize = 1 << dbits - 1; + int diff = cur - prev; + + diff &= mask; + if (diff >= (1 << abits) - dsize) + diff -= 1 << abits; + if (diff < -dsize || diff > dsize || !diff) { + put_bits(pb, 1, 1); + put_bits(pb, abits, diff); + } else { + put_bits(pb, 1, 0); + put_bits(pb, dbits - 1, FFABS(diff) - 1); + put_bits(pb, 1, diff < 0); + } +} + +static void put_alpha_run(PutBitContext *pb, int run) +{ + if (run) { + put_bits(pb, 1, 0); + if (run < 0x10) + put_bits(pb, 4, run); + else + put_bits(pb, 15, run); + } else { + put_bits(pb, 1, 1); + } +} + +// todo alpha quantisation for high quants +static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb, + const uint16_t *src, int linesize, + int mbs_per_slice, uint16_t *blocks, + int quant) +{ + const int abits = ctx->alpha_bits; + const int mask = (1 << abits) - 1; + const int num_coeffs = mbs_per_slice * 256; + int saved_pos = put_bits_count(pb); + int prev = mask, cur; + int idx = 0; + int run = 0; + + cur = blocks[idx++]; + put_alpha_diff(pb, cur, prev, abits); + prev = cur; + do { + cur = blocks[idx++]; + if (cur != prev) { + put_alpha_run (pb, run); + put_alpha_diff(pb, cur, prev, abits); + prev = cur; + run = 0; + } else { + run++; + } + } while (idx < num_coeffs); + if (run) + put_alpha_run(pb, run); + flush_put_bits(pb); + return (put_bits_count(pb) - saved_pos) >> 3; +} + static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, PutBitContext *pb, int sizes[4], int x, int y, int quant, @@ -444,14 +549,23 @@ static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, src = (const uint16_t*)(pic->data[i] + yp * linesize + line_add * pic->linesize[i]) + xp; - get_slice_data(ctx, src, linesize, xp, yp, - pwidth, avctx->height / ctx->pictures_per_frame, - ctx->blocks[0], ctx->emu_buf, - mbs_per_slice, num_cblocks, is_chroma); - sizes[i] = encode_slice_plane(ctx, pb, src, linesize, - mbs_per_slice, ctx->blocks[0], - num_cblocks, plane_factor, - qmat); + if (i < 3) { + get_slice_data(ctx, src, linesize, xp, yp, + pwidth, avctx->height / ctx->pictures_per_frame, + ctx->blocks[0], ctx->emu_buf, + mbs_per_slice, num_cblocks, is_chroma); + sizes[i] = encode_slice_plane(ctx, pb, src, linesize, + mbs_per_slice, ctx->blocks[0], + num_cblocks, plane_factor, + qmat); + } else { + get_alpha_data(ctx, src, linesize, xp, yp, + pwidth, avctx->height / ctx->pictures_per_frame, + ctx->blocks[0], mbs_per_slice, ctx->alpha_bits); + sizes[i] = encode_alpha_plane(ctx, pb, src, linesize, + mbs_per_slice, ctx->blocks[0], + quant); + } total_size += sizes[i]; } return total_size; @@ -564,6 +678,66 @@ static int estimate_slice_plane(ProresContext *ctx, int *error, int plane, return FFALIGN(bits, 8); } +static int est_alpha_diff(int cur, int prev, int abits) +{ + const int mask = (1 << abits) - 1; + const int dbits = (abits == 8) ? 4 : 7; + const int dsize = 1 << dbits - 1; + int diff = cur - prev; + + diff &= mask; + if (diff >= (1 << abits) - dsize) + diff -= 1 << abits; + if (diff < -dsize || diff > dsize || !diff) + return abits + 1; + else + return dbits + 1; +} + +static int estimate_alpha_plane(ProresContext *ctx, int *error, + const uint16_t *src, int linesize, + int mbs_per_slice, int quant, + int16_t *blocks) +{ + const int abits = ctx->alpha_bits; + const int mask = (1 << abits) - 1; + const int num_coeffs = mbs_per_slice * 256; + int prev = mask, cur; + int idx = 0; + int run = 0; + int bits; + + *error = 0; + cur = blocks[idx++]; + bits = est_alpha_diff(cur, prev, abits); + prev = cur; + do { + cur = blocks[idx++]; + if (cur != prev) { + if (!run) + bits++; + else if (run < 0x10) + bits += 4; + else + bits += 15; + bits += est_alpha_diff(cur, prev, abits); + prev = cur; + run = 0; + } else { + run++; + } + } while (idx < num_coeffs); + + if (run) { + if (run < 0x10) + bits += 4; + else + bits += 15; + } + + return bits; +} + static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic, int trellis_node, int x, int y, int mbs_per_slice, ProresThreadData *td) @@ -610,10 +784,16 @@ static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic, src = (const uint16_t*)(pic->data[i] + yp * linesize[i] + line_add * pic->linesize[i]) + xp; - get_slice_data(ctx, src, linesize[i], xp, yp, - pwidth, avctx->height / ctx->pictures_per_frame, - td->blocks[i], td->emu_buf, - mbs_per_slice, num_cblocks[i], is_chroma[i]); + if (i < 3) { + get_slice_data(ctx, src, linesize[i], xp, yp, + pwidth, avctx->height / ctx->pictures_per_frame, + td->blocks[i], td->emu_buf, + mbs_per_slice, num_cblocks[i], is_chroma[i]); + } else { + get_alpha_data(ctx, src, linesize[i], xp, yp, + pwidth, avctx->height / ctx->pictures_per_frame, + td->blocks[i], mbs_per_slice, ctx->alpha_bits); + } } for (q = min_quant; q < max_quant + 2; q++) { @@ -625,13 +805,16 @@ static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic, for (q = min_quant; q <= max_quant; q++) { bits = 0; error = 0; - for (i = 0; i < ctx->num_planes; i++) { + for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) { bits += estimate_slice_plane(ctx, &error, i, src, linesize[i], mbs_per_slice, num_cblocks[i], plane_factor[i], ctx->quants[q], td); } + if (ctx->alpha_bits) + bits += estimate_alpha_plane(ctx, &error, src, linesize[3], + mbs_per_slice, q, td->blocks[3]); if (bits > 65000 * 8) { error = SCORE_LIMIT; break; @@ -654,13 +837,16 @@ static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic, for (i = 0; i < 64; i++) qmat[i] = ctx->quant_mat[i] * q; } - for (i = 0; i < ctx->num_planes; i++) { + for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) { bits += estimate_slice_plane(ctx, &error, i, src, linesize[i], mbs_per_slice, num_cblocks[i], plane_factor[i], qmat, td); } + if (ctx->alpha_bits) + bits += estimate_alpha_plane(ctx, &error, src, linesize[3], + mbs_per_slice, q, td->blocks[3]); if (bits <= ctx->bits_per_mb * mbs_per_slice) break; } @@ -782,7 +968,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, bytestream_put_byte (&buf, avctx->color_primaries); bytestream_put_byte (&buf, avctx->color_trc); bytestream_put_byte (&buf, avctx->colorspace); - bytestream_put_byte (&buf, 0x40); // source format and alpha information + bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3)); bytestream_put_byte (&buf, 0); // reserved if (ctx->quant_sel != QUANT_MAT_DEFAULT) { bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present @@ -904,12 +1090,20 @@ static av_cold int encode_init(AVCodecContext *avctx) "there should be an integer power of two MBs per slice\n"); return AVERROR(EINVAL); } + if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) { + if (ctx->alpha_bits & 7) { + av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n"); + return AVERROR(EINVAL); + } + } else { + ctx->alpha_bits = 0; + } ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10 ? CFACTOR_Y422 : CFACTOR_Y444; ctx->profile_info = prores_profile_info + ctx->profile; - ctx->num_planes = 3; + ctx->num_planes = 3 + !!ctx->alpha_bits; ctx->mb_width = FFALIGN(avctx->width, 16) >> 4; @@ -1025,7 +1219,7 @@ static const AVOption options[] = { AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE }, { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = PRORES_PROFILE_STANDARD }, - PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" }, + PRORES_PROFILE_PROXY, PRORES_PROFILE_4444, VE, "profile" }, { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY }, 0, 0, VE, "profile" }, { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT }, @@ -1034,6 +1228,8 @@ static const AVOption options[] = { 0, 0, VE, "profile" }, { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ }, 0, 0, VE, "profile" }, + { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 }, + 0, 0, VE, "profile" }, { "vendor", "vendor ID", OFFSET(vendor), AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE }, { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb), @@ -1052,6 +1248,8 @@ static const AVOption options[] = { 0, 0, VE, "quant_mat" }, { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT }, 0, 0, VE, "quant_mat" }, + { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT, + { .i64 = 16 }, 0, 16, VE }, { NULL } }; @@ -1073,7 +1271,8 @@ AVCodec ff_prores_encoder = { .capabilities = CODEC_CAP_SLICE_THREADS, .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"), .pix_fmts = (const enum AVPixelFormat[]) { - AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_NONE + AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, + AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE }, .priv_class = &proresenc_class, };