From f07a8b13f38bbd52d55c52d1d6ea1265b13c7a84 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Thu, 21 Jan 2021 13:26:17 +0100 Subject: [PATCH] avfilter/vf_super2xsai: add slice threading support --- libavfilter/vf_super2xsai.c | 60 +++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/libavfilter/vf_super2xsai.c b/libavfilter/vf_super2xsai.c index 4233f02388..d6c5f44c17 100644 --- a/libavfilter/vf_super2xsai.c +++ b/libavfilter/vf_super2xsai.c @@ -46,6 +46,10 @@ typedef struct Super2xSaIContext { int is_be; } Super2xSaIContext; +typedef struct ThreadData { + AVFrame *in, *out; +} ThreadData; + #define GET_RESULT(A, B, C, D) ((A != C || A != D) - (B != C || B != D)) #define INTERPOLATE(A, B) (((A & hi_pixel_mask) >> 1) + ((B & hi_pixel_mask) >> 1) + (A & B & lo_pixel_mask)) @@ -53,12 +57,18 @@ typedef struct Super2xSaIContext { #define Q_INTERPOLATE(A, B, C, D) ((A & q_hi_pixel_mask) >> 2) + ((B & q_hi_pixel_mask) >> 2) + ((C & q_hi_pixel_mask) >> 2) + ((D & q_hi_pixel_mask) >> 2) \ + ((((A & q_lo_pixel_mask) + (B & q_lo_pixel_mask) + (C & q_lo_pixel_mask) + (D & q_lo_pixel_mask)) >> 2) & q_lo_pixel_mask) -static void super2xsai(AVFilterContext *ctx, - const uint8_t *src, int src_linesize, - uint8_t *dst, int dst_linesize, - int width, int height) +static int super2xsai(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { Super2xSaIContext *s = ctx->priv; + ThreadData *td = arg; + AVFrame *in = td->in; + AVFrame *out = td->out; + const uint8_t *src = in->data[0]; + uint8_t *dst = out->data[0]; + const int src_linesize = in->linesize[0]; + const int dst_linesize = out->linesize[0]; + const int width = in->width; + const int height = in->height; unsigned int x, y; uint32_t color[4][4]; const uint8_t *src_line[4]; @@ -67,18 +77,20 @@ static void super2xsai(AVFilterContext *ctx, const uint32_t lo_pixel_mask = s->lo_pixel_mask; const uint32_t q_hi_pixel_mask = s->q_hi_pixel_mask; const uint32_t q_lo_pixel_mask = s->q_lo_pixel_mask; + const int slice_start = (height * jobnr) / nb_jobs; + const int slice_end = (height * (jobnr+1)) / nb_jobs; /* Point to the first 4 lines, first line is duplicated */ - src_line[0] = src; - src_line[1] = src; - src_line[2] = src + src_linesize*FFMIN(1, height-1); - src_line[3] = src + src_linesize*FFMIN(2, height-1); + src_line[0] = src + src_linesize*FFMAX(slice_start - 1, 0); + src_line[1] = src + src_linesize*slice_start; + src_line[2] = src + src_linesize*FFMIN(slice_start + 1, height-1); + src_line[3] = src + src_linesize*FFMIN(slice_start + 2, height-1); #define READ_COLOR4(dst, src_line, off) dst = *((const uint32_t *)src_line + off) #define READ_COLOR3(dst, src_line, off) dst = AV_RL24 (src_line + 3*off) #define READ_COLOR2(dst, src_line, off) dst = s->is_be ? AV_RB16(src_line + 2 * off) : AV_RL16(src_line + 2 * off) - for (y = 0; y < height; y++) { + for (y = slice_start; y < slice_end; y++) { uint8_t *dst_line[2]; dst_line[0] = dst + dst_linesize*2*y; @@ -229,6 +241,8 @@ static void super2xsai(AVFilterContext *ctx, if (y < height - 3) src_line[3] += src_linesize; } // y loop + + return 0; } static int query_formats(AVFilterContext *ctx) @@ -305,24 +319,25 @@ static int config_output(AVFilterLink *outlink) return 0; } -static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref) +static int filter_frame(AVFilterLink *inlink, AVFrame *in) { - AVFilterLink *outlink = inlink->dst->outputs[0]; - AVFrame *outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!outpicref) { - av_frame_free(&inpicref); + AVFilterContext *ctx = inlink->dst; + AVFilterLink *outlink = ctx->outputs[0]; + ThreadData td; + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + av_frame_free(&in); return AVERROR(ENOMEM); } - av_frame_copy_props(outpicref, inpicref); - outpicref->width = outlink->w; - outpicref->height = outlink->h; + av_frame_copy_props(out, in); + out->width = outlink->w; + out->height = outlink->h; - super2xsai(inlink->dst, inpicref->data[0], inpicref->linesize[0], - outpicref->data[0], outpicref->linesize[0], - inlink->w, inlink->h); + td.in = in, td.out = out; + ctx->internal->execute(ctx, super2xsai, &td, NULL, FFMIN(in->height, ff_filter_get_nb_threads(ctx))); - av_frame_free(&inpicref); - return ff_filter_frame(outlink, outpicref); + av_frame_free(&in); + return ff_filter_frame(outlink, out); } static const AVFilterPad super2xsai_inputs[] = { @@ -351,4 +366,5 @@ AVFilter ff_vf_super2xsai = { .query_formats = query_formats, .inputs = super2xsai_inputs, .outputs = super2xsai_outputs, + .flags = AVFILTER_FLAG_SLICE_THREADS, };