avfilter/vf_libplacebo: add flexible crop exprs

Motivated by a desire to use vf_libplacebo as a GPU-accelerated cropping/padding/zooming filter. This commit adds support for setting the `input/target.crop` fields as dynamic expressions. Re-use the same generic variables available to other scale and crop type filters, and also add some more that we can afford as a result of being able to set these properties dynamically. It's worth pointing out that `out_t/ot` is currently redundant with `in_t/t` since it will always contain the same PTS values, but I plan on changing this in the near future. I decided to also expose `crop_w/crop_h` and `pos_w/pos_h` as variables in the expression parser itself, since this enables the fairly common use case of determining dimensions first and then placing the image appropriately, such as is done in the default behavior (which centers the cropped/placed region by default).
2023-05-01 16:35:22 +02:00 · 2023-05-01 16:35:22 +02:00 · b80e43bf07
parent 4b11a07550
commit b80e43bf07
2 changed files with 192 additions and 8 deletions
--- a/doc/filters.texi
+++ b/doc/filters.texi
@ -15997,10 +15997,31 @@ in source frames.
@table @option
@item w
@item h
-Set the output video dimension expression. Default value is the input dimension.
+Set the output video dimension expression. Default values are @code{iw} and
+@code{ih}.

 Allows for the same expressions as the @ref{scale} filter.

+@item crop_x
+@item crop_y
+Set the input crop x/y expressions, default values are @code{(iw-cw)/2} and
+@code{(ih-ch)/2}.
+
+@item crop_w
+@item crop_h
+Set the input crop width/height expressions, default values are @code{iw} and
+@code{ih}.
+
+@item pos_x
+@item pos_y
+Set the output placement x/y expressions, default values are @code{(ow-pw)/2}
+and @code{(oh-ph)/2}.
+
+@item pos_w
+@item pos_h
+Set the output placement width/height expressions, default values are @code{ow}
+and @code{oh}.
+
@item format
 Set the output format override. If unset (the default), frames will be output
 in the same format as the respective input frames. Otherwise, format conversion
@ -16012,9 +16033,9 @@ Work the same as the identical @ref{scale} filter options.

@item normalize_sar
 If enabled, output frames will always have a pixel aspect ratio of 1:1. This
-will introduce padding/cropping as necessary. If disabled (the default), any
-aspect ratio mismatches, including those from e.g. anamorphic video sources,
-are forwarded to the output pixel aspect ratio.
+will introduce additional padding/cropping as necessary. If disabled (the
+default), any aspect ratio mismatches, including those from e.g. anamorphic
+video sources, are forwarded to the output pixel aspect ratio.

@item pad_crop_ratio
 Specifies a ratio (between @code{0.0} and @code{1.0}) between padding and
@ -16026,7 +16047,7 @@ approaches.

@item fillcolor
 Set the color used to fill the output area not covered by the output image, for
-example as a result of @ref{normalize_sar}. For the general syntax of this
+example as a result of @option{normalize_sar}. For the general syntax of this
 option, check the @ref{color syntax,,"Color" section in the ffmpeg-utils
 manual,ffmpeg-utils}. Defaults to @code{black}.

@ -16051,6 +16072,30 @@ BT.2020+PQ, overriding the usual input frame metadata. These will also be
 picked as the values of @code{auto} for the respective frame output options.
@end table

+In addition to the expression constants documented for the @ref{scale} filter,
+the @option{crop_w}, @option{crop_h}, @option{crop_x}, @option{crop_y},
+@option{pos_w}, @option{pos_h}, @option{pos_x} and @option{pos_y} options can
+also contain the following constants:
+
+@table @option
+@item crop_w, cw
+@item crop_h, ch
+The computed values of @option{crop_w} and @option{crop_h}.
+
+@item pos_w, pw
+@item pos_h, ph
+The computed values of @option{pos_w} and @option{pos_h}.
+
+@item in_t, t
+The input frame timestamp, in seconds. NAN if input timestamp is unknown.
+
+@item out_t, ot
+The input frame timestamp, in seconds. NAN if input timestamp is unknown.
+
+@item n
+The input frame number, starting with 0.
+@end table
+
@subsubsection Scaling
 The options in this section control how libplacebo performs upscaling and (if
 necessary) downscaling. Note that libplacebo will always internally operate on
--- a/libavfilter/vf_libplacebo.c
+++ b/libavfilter/vf_libplacebo.c
@ -16,6 +16,7 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

+#include "libavutil/eval.h"
 #include "libavutil/file.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
@ -60,6 +61,50 @@ static const struct pl_tone_map_function * const tonemapping_funcs[TONE_MAP_COUN
    [TONE_MAP_LINEAR]    = &pl_tone_map_linear,
 };

+static const char *const var_names[] = {
+    "in_w", "iw",   ///< width  of the input video frame
+    "in_h", "ih",   ///< height of the input video frame
+    "out_w", "ow",  ///< width  of the output video frame
+    "out_h", "oh",  ///< height of the output video frame
+    "crop_w", "cw", ///< evaluated input crop width
+    "crop_h", "ch", ///< evaluated input crop height
+    "pos_w", "pw",  ///< evaluated output placement width
+    "pos_h", "ph",  ///< evaluated output placement height
+    "a",            ///< iw/ih
+    "sar",          ///< input pixel aspect ratio
+    "dar",          ///< output pixel aspect ratio
+    "hsub",         ///< input horizontal subsampling factor
+    "vsub",         ///< input vertical subsampling factor
+    "ohsub",        ///< output horizontal subsampling factor
+    "ovsub",        ///< output vertical subsampling factor
+    "in_t", "t",    ///< input frame pts
+    "out_t", "ot",  ///< output frame pts
+    "n",            ///< number of frame
+    NULL,
+};
+
+enum var_name {
+    VAR_IN_W,   VAR_IW,
+    VAR_IN_H,   VAR_IH,
+    VAR_OUT_W,  VAR_OW,
+    VAR_OUT_H,  VAR_OH,
+    VAR_CROP_W, VAR_CW,
+    VAR_CROP_H, VAR_CH,
+    VAR_POS_W,  VAR_PW,
+    VAR_POS_H,  VAR_PH,
+    VAR_A,
+    VAR_SAR,
+    VAR_DAR,
+    VAR_HSUB,
+    VAR_VSUB,
+    VAR_OHSUB,
+    VAR_OVSUB,
+    VAR_IN_T,   VAR_T,
+    VAR_OUT_T,  VAR_OT,
+    VAR_N,
+    VAR_VARS_NB
+};
+
 typedef struct LibplaceboContext {
    /* lavfi vulkan*/
    FFVulkanContext vkctx;
@ -75,8 +120,16 @@ typedef struct LibplaceboContext {
    char *out_format_string;
    enum AVPixelFormat out_format;
    char *fillcolor;
+    double var_values[VAR_VARS_NB];
    char *w_expr;
    char *h_expr;
+    char *crop_x_expr, *crop_y_expr;
+    char *crop_w_expr, *crop_h_expr;
+    char *pos_x_expr, *pos_y_expr;
+    char *pos_w_expr, *pos_h_expr;
+    // Parsed expressions for input/output crop
+    AVExpr *crop_x_pexpr, *crop_y_pexpr, *crop_w_pexpr, *crop_h_pexpr;
+    AVExpr *pos_x_pexpr, *pos_y_pexpr, *pos_w_pexpr, *pos_h_pexpr;
    AVRational target_sar;
    float pad_crop_ratio;
    int force_original_aspect_ratio;
@ -249,6 +302,7 @@ static void libplacebo_uninit(AVFilterContext *avctx);

 static int libplacebo_init(AVFilterContext *avctx)
 {
+    int err = 0;
    LibplaceboContext *s = avctx->priv;

    /* Create libplacebo log context */
@ -273,8 +327,28 @@ static int libplacebo_init(AVFilterContext *avctx)
        s->out_format = AV_PIX_FMT_NONE;
    }

+    RET(av_expr_parse(&s->crop_x_pexpr, s->crop_x_expr, var_names,
+                      NULL, NULL, NULL, NULL, 0, s));
+    RET(av_expr_parse(&s->crop_y_pexpr, s->crop_y_expr, var_names,
+                      NULL, NULL, NULL, NULL, 0, s));
+    RET(av_expr_parse(&s->crop_w_pexpr, s->crop_w_expr, var_names,
+                      NULL, NULL, NULL, NULL, 0, s));
+    RET(av_expr_parse(&s->crop_h_pexpr, s->crop_h_expr, var_names,
+                      NULL, NULL, NULL, NULL, 0, s));
+    RET(av_expr_parse(&s->pos_x_pexpr, s->pos_x_expr, var_names,
+                      NULL, NULL, NULL, NULL, 0, s));
+    RET(av_expr_parse(&s->pos_y_pexpr, s->pos_y_expr, var_names,
+                      NULL, NULL, NULL, NULL, 0, s));
+    RET(av_expr_parse(&s->pos_w_pexpr, s->pos_w_expr, var_names,
+                      NULL, NULL, NULL, NULL, 0, s));
+    RET(av_expr_parse(&s->pos_h_pexpr, s->pos_h_expr, var_names,
+                      NULL, NULL, NULL, NULL, 0, s));
+
    /* Note: s->vulkan etc. are initialized later, when hwctx is available */
    return 0;
+
+fail:
+    return err;
 }

 static int init_vulkan(AVFilterContext *avctx)
@ -364,6 +438,15 @@ static void libplacebo_uninit(AVFilterContext *avctx)
    pl_log_destroy(&s->log);
    ff_vk_uninit(&s->vkctx);
    s->gpu = NULL;
+
+    av_expr_free(s->crop_x_pexpr);
+    av_expr_free(s->crop_y_pexpr);
+    av_expr_free(s->crop_w_pexpr);
+    av_expr_free(s->crop_h_pexpr);
+    av_expr_free(s->pos_x_pexpr);
+    av_expr_free(s->pos_y_pexpr);
+    av_expr_free(s->pos_w_pexpr);
+    av_expr_free(s->pos_h_pexpr);
 }

 static int process_frames(AVFilterContext *avctx, AVFrame *out, AVFrame *in)
@ -398,6 +481,25 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out, AVFrame *in)
    if (!s->apply_filmgrain)
        image.film_grain.type = PL_FILM_GRAIN_NONE;

+    s->var_values[VAR_CROP_W] = s->var_values[VAR_CW] =
+        av_expr_eval(s->crop_w_pexpr, s->var_values, NULL);
+    s->var_values[VAR_CROP_H] = s->var_values[VAR_CH] =
+        av_expr_eval(s->crop_h_pexpr, s->var_values, NULL);
+    s->var_values[VAR_POS_W]  = s->var_values[VAR_PW] =
+        av_expr_eval(s->pos_w_pexpr, s->var_values, NULL);
+    s->var_values[VAR_POS_H]  = s->var_values[VAR_PH] =
+        av_expr_eval(s->pos_h_pexpr, s->var_values, NULL);
+
+    image.crop.x0 = av_expr_eval(s->crop_x_pexpr, s->var_values, NULL);
+    image.crop.y0 = av_expr_eval(s->crop_y_pexpr, s->var_values, NULL);
+    image.crop.x1 = image.crop.x0 + s->var_values[VAR_CROP_W];
+    image.crop.y1 = image.crop.y0 + s->var_values[VAR_CROP_H];
+
+    target.crop.x0 = av_expr_eval(s->pos_x_pexpr, s->var_values, NULL);
+    target.crop.y0 = av_expr_eval(s->pos_y_pexpr, s->var_values, NULL);
+    target.crop.x1 = target.crop.x0 + s->var_values[VAR_POS_W];
+    target.crop.y1 = target.crop.y0 + s->var_values[VAR_POS_H];
+
    if (s->target_sar.num) {
        float aspect = pl_rect2df_aspect(&target.crop) * av_q2d(s->target_sar);
        pl_rect2df_aspect_set(&target.crop, aspect, s->pad_crop_ratio);
@ -530,6 +632,18 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
    out->width = outlink->w;
    out->height = outlink->h;

+    /* Dynamic variables */
+    s->var_values[VAR_IN_T]   = s->var_values[VAR_T] =
+        in->pts == AV_NOPTS_VALUE ? NAN : in->pts * av_q2d(link->time_base);
+    s->var_values[VAR_OUT_T]  = s->var_values[VAR_OT] =
+        out->pts == AV_NOPTS_VALUE ? NAN : out->pts * av_q2d(outlink->time_base);
+    s->var_values[VAR_N]      = link->frame_count_out;
+    /* Will be evaluated/set by `process_frames` */
+    s->var_values[VAR_CROP_W] = s->var_values[VAR_CW] = NAN;
+    s->var_values[VAR_CROP_H] = s->var_values[VAR_CH] = NAN;
+    s->var_values[VAR_POS_W]  = s->var_values[VAR_PW] = NAN;
+    s->var_values[VAR_POS_H]  = s->var_values[VAR_PH] = NAN;
+
    if (s->apply_dovi && av_frame_get_side_data(in, AV_FRAME_DATA_DOVI_METADATA)) {
        /* Output of dovi reshaping is always BT.2020+PQ, so infer the correct
         * output colorspace defaults */
@ -660,6 +774,8 @@ static int libplacebo_config_output(AVFilterLink *outlink)
    AVFilterContext *avctx = outlink->src;
    LibplaceboContext *s   = avctx->priv;
    AVFilterLink *inlink   = outlink->src->inputs[0];
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    const AVPixFmtDescriptor *out_desc = av_pix_fmt_desc_get(outlink->format);
    AVHWFramesContext *hwfc;
    AVVulkanFramesContext *vkfc;
    AVRational scale_sar;
@ -687,6 +803,21 @@ static int libplacebo_config_output(AVFilterLink *outlink)
            outlink->sample_aspect_ratio = scale_sar;
    }

+    /* Static variables */
+    s->var_values[VAR_IN_W]     = s->var_values[VAR_IW] = inlink->w;
+    s->var_values[VAR_IN_H]     = s->var_values[VAR_IH] = inlink->h;
+    s->var_values[VAR_OUT_W]    = s->var_values[VAR_OW] = outlink->w;
+    s->var_values[VAR_OUT_H]    = s->var_values[VAR_OH] = outlink->h;
+    s->var_values[VAR_A]        = (double) inlink->w / inlink->h;
+    s->var_values[VAR_SAR]      = inlink->sample_aspect_ratio.num ?
+        av_q2d(inlink->sample_aspect_ratio) : 1.0;
+    s->var_values[VAR_DAR]      = outlink->sample_aspect_ratio.num ?
+        av_q2d(outlink->sample_aspect_ratio) : 1.0;
+    s->var_values[VAR_HSUB]     = 1 << desc->log2_chroma_w;
+    s->var_values[VAR_VSUB]     = 1 << desc->log2_chroma_h;
+    s->var_values[VAR_OHSUB]    = 1 << out_desc->log2_chroma_w;
+    s->var_values[VAR_OVSUB]    = 1 << out_desc->log2_chroma_h;
+
    if (outlink->format != AV_PIX_FMT_VULKAN)
        return 0;

@ -714,15 +845,23 @@ fail:
 #define DYNAMIC (STATIC | AV_OPT_FLAG_RUNTIME_PARAM)

 static const AVOption libplacebo_options[] = {
-    { "w", "Output video width",  OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = STATIC },
-    { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = STATIC },
+    { "w", "Output video frame width",  OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = STATIC },
+    { "h", "Output video frame height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = STATIC },
+    { "crop_x", "Input video crop x", OFFSET(crop_x_expr), AV_OPT_TYPE_STRING, {.str = "(iw-cw)/2"}, .flags = DYNAMIC },
+    { "crop_y", "Input video crop y", OFFSET(crop_y_expr), AV_OPT_TYPE_STRING, {.str = "(ih-ch)/2"}, .flags = DYNAMIC },
+    { "crop_w", "Input video crop w", OFFSET(crop_w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = DYNAMIC },
+    { "crop_h", "Input video crop h", OFFSET(crop_h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = DYNAMIC },
+    { "pos_x", "Output video placement x", OFFSET(pos_x_expr), AV_OPT_TYPE_STRING, {.str = "(ow-pw)/2"}, .flags = DYNAMIC },
+    { "pos_y", "Output video placement y", OFFSET(pos_y_expr), AV_OPT_TYPE_STRING, {.str = "(oh-ph)/2"}, .flags = DYNAMIC },
+    { "pos_w", "Output video placement w", OFFSET(pos_w_expr), AV_OPT_TYPE_STRING, {.str = "ow"}, .flags = DYNAMIC },
+    { "pos_h", "Output video placement h", OFFSET(pos_h_expr), AV_OPT_TYPE_STRING, {.str = "oh"}, .flags = DYNAMIC },
    { "format", "Output video format", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = STATIC },
    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, STATIC, "force_oar" },
        { "disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, STATIC, "force_oar" },
        { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, STATIC, "force_oar" },
        { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, STATIC, "force_oar" },
    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, STATIC },
-    { "normalize_sar", "force SAR normalization to 1:1", OFFSET(normalize_sar), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, STATIC },
+    { "normalize_sar", "force SAR normalization to 1:1 by adjusting pos_x/y/w/h", OFFSET(normalize_sar), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, STATIC },
    { "pad_crop_ratio", "ratio between padding and cropping when normalizing SAR (0=pad, 1=crop)", OFFSET(pad_crop_ratio), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, 1.0, DYNAMIC },
    { "fillcolor", "Background fill color", OFFSET(fillcolor), AV_OPT_TYPE_STRING, {.str = "black"}, .flags = DYNAMIC },