From bc8662984261f422b49fdfff4cb273adde06917a Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Sun, 6 Sep 2020 13:44:52 +0200 Subject: [PATCH] avcodec/pixlet: postprocess luma using precalculated lut Realtime decoding speed raises from 1.08 to 1.84 for 1504x846, 25391 kb/s, 24 fps video. --- libavcodec/pixlet.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/libavcodec/pixlet.c b/libavcodec/pixlet.c index 78f571cd5f..42acd683fa 100644 --- a/libavcodec/pixlet.c +++ b/libavcodec/pixlet.c @@ -58,6 +58,7 @@ typedef struct PixletContext { int16_t *filter[2]; int16_t *prediction; int64_t scaling[4][2][NB_LEVELS]; + uint16_t lut[65536]; SubBand band[4][NB_LEVELS * 3 + 1]; } PixletContext; @@ -462,11 +463,27 @@ static void reconstruction(AVCodecContext *avctx, int16_t *dest, } } -static void postprocess_luma(AVFrame *frame, int w, int h, int depth) +static void build_luma_lut(AVCodecContext *avctx, int depth) { + PixletContext *ctx = avctx->priv_data; + int max = (1 << depth) - 1; + + if (ctx->depth == depth) + return; + ctx->depth = depth; + + for (int i = 0; i < FF_ARRAY_ELEMS(ctx->lut); i++) + ctx->lut[i] = ((int64_t)i * i * 65535LL) / max / max; +} + +static void postprocess_luma(AVCodecContext *avctx, AVFrame *frame, + int w, int h, int depth) +{ + PixletContext *ctx = avctx->priv_data; uint16_t *dsty = (uint16_t *)frame->data[0]; int16_t *srcy = (int16_t *)frame->data[0]; ptrdiff_t stridey = frame->linesize[0] / 2; + uint16_t *lut = ctx->lut; int i, j; for (j = 0; j < h; j++) { @@ -476,8 +493,7 @@ static void postprocess_luma(AVFrame *frame, int w, int h, int depth) else if (srcy[i] > ((1 << depth) - 1)) dsty[i] = 65535; else - dsty[i] = ((int64_t) srcy[i] * srcy[i] * 65535) / - ((1 << depth) - 1) / ((1 << depth) - 1); + dsty[i] = lut[srcy[i]]; } dsty += stridey; srcy += stridey; @@ -591,7 +607,7 @@ static int pixlet_decode_frame(AVCodecContext *avctx, void *data, int i, w, h, width, height, ret, version; AVFrame *p = data; ThreadFrame frame = { .f = data }; - uint32_t pktsize; + uint32_t pktsize, depth; bytestream2_init(&ctx->gb, avpkt->data, avpkt->size); @@ -623,12 +639,14 @@ static int pixlet_decode_frame(AVCodecContext *avctx, void *data, ctx->levels = bytestream2_get_be32(&ctx->gb); if (ctx->levels != NB_LEVELS) return AVERROR_INVALIDDATA; - ctx->depth = bytestream2_get_be32(&ctx->gb); - if (ctx->depth < 8 || ctx->depth > 15) { - avpriv_request_sample(avctx, "Depth %d", ctx->depth); + depth = bytestream2_get_be32(&ctx->gb); + if (depth < 8 || depth > 15) { + avpriv_request_sample(avctx, "Depth %d", depth); return AVERROR_INVALIDDATA; } + build_luma_lut(avctx, depth); + ret = ff_set_dimensions(avctx, w, h); if (ret < 0) return ret; @@ -667,7 +685,7 @@ static int pixlet_decode_frame(AVCodecContext *avctx, void *data, break; } - postprocess_luma(frame.f, ctx->w, ctx->h, ctx->depth); + postprocess_luma(avctx, frame.f, ctx->w, ctx->h, ctx->depth); postprocess_chroma(frame.f, ctx->w >> 1, ctx->h >> 1, ctx->depth); *got_frame = 1;