avfilter: add libvmaf_cuda

Signed-off-by: Kyle Swanson <kswanson@netflix.com>
2025-01-03 05:22:10 +00:00 · 2023-09-25 13:14:13 +01:00 · 2023-09-25 13:14:13 +01:00 · 7f685d0f49
commit 7f685d0f49
parent b23eaf968e
5 changed files with 240 additions and 0 deletions
--- a/2
+++ b/2
@ -3833,6 +3833,7 @@ vflip_vulkan_filter_deps="vulkan spirv_compiler"
 vidstabdetect_filter_deps="libvidstab"
 vidstabtransform_filter_deps="libvidstab"
 libvmaf_filter_deps="libvmaf"
 libvmaf_cuda_filter_deps="libvmaf libvmaf_cuda ffnvcodec"
 zmq_filter_deps="libzmq"
 zoompan_filter_deps="swscale"
 zscale_filter_deps="libzimg const_nan"
@ -6811,6 +6812,7 @@ enabled libuavs3d         && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uav
 enabled libv4l2           && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
 enabled libvidstab        && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
 enabled libvmaf           && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf.h vmaf_init
 enabled libvmaf           && check_pkg_config libvmaf_cuda "libvmaf >= 2.0.0" libvmaf_cuda.h vmaf_cuda_state_init
 enabled libvo_amrwbenc    && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
 enabled libvorbis         && require_pkg_config libvorbis vorbis vorbis/codec.h vorbis_info_init &&
                             require_pkg_config libvorbisenc vorbisenc vorbis/vorbisenc.h vorbis_encode_init
--- a/doc/filters.texi
+++ b/doc/filters.texi
@ -16928,6 +16928,32 @@ ffmpeg -i distorted.mpg -i reference.mkv -lavfi "[0:v]settb=AVTB,setpts=PTS-STAR
@end example
@end itemize
@section libvmaf_cuda
 This is the CUDA variant of the @ref{libvmaf} filter. It only accepts CUDA frames.
 It requires Netflix's vmaf library (libvmaf) as a pre-requisite.
 After installing the library it can be enabled using:
@code{./configure --enable-nonfree --enable-ffnvcodec --enable-libvmaf}.
@subsection Examples
@itemize
@item
 Basic usage showing CUVID hardware decoding and CUDA scaling with @ref{scale_cuda}:
@example
 ffmpeg \
    -hwaccel cuda -hwaccel_output_format cuda -codec:v av1_cuvid -i dis.obu \
    -hwaccel cuda -hwaccel_output_format cuda -codec:v av1_cuvid -i ref.obu \
    -filter_complex "
        [0:v]scale_cuda=format=yuv420p[ref]; \
        [1:v]scale_cuda=format=yuv420p[dis]; \
        [dis][ref]libvmaf_cuda=log_fmt=json:log_path=output.json
    " \
    -f null -
@end example
@end itemize
@section limitdiff
 Apply limited difference filter using second and optionally third video stream.
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@ -363,6 +363,7 @@ OBJS-$(CONFIG_LENSCORRECTION_FILTER)         += vf_lenscorrection.o
 OBJS-$(CONFIG_LENSFUN_FILTER)                += vf_lensfun.o
 OBJS-$(CONFIG_LIBPLACEBO_FILTER)             += vf_libplacebo.o vulkan.o vulkan_filter.o
 OBJS-$(CONFIG_LIBVMAF_FILTER)                += vf_libvmaf.o framesync.o
 OBJS-$(CONFIG_LIBVMAF_CUDA_FILTER)           += vf_libvmaf.o framesync.o
 OBJS-$(CONFIG_LIMITDIFF_FILTER)              += vf_limitdiff.o framesync.o
 OBJS-$(CONFIG_LIMITER_FILTER)                += vf_limiter.o
 OBJS-$(CONFIG_LOOP_FILTER)                   += f_loop.o
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@ -339,6 +339,7 @@ extern const AVFilter ff_vf_lenscorrection;
 extern const AVFilter ff_vf_lensfun;
 extern const AVFilter ff_vf_libplacebo;
 extern const AVFilter ff_vf_libvmaf;
 extern const AVFilter ff_vf_libvmaf_cuda;
 extern const AVFilter ff_vf_limitdiff;
 extern const AVFilter ff_vf_limiter;
 extern const AVFilter ff_vf_loop;
--- a/libavfilter/vf_libvmaf.c
+++ b/libavfilter/vf_libvmaf.c
@ -24,6 +24,8 @@
 * Calculate the VMAF between two input videos.
 */
 #include "config_components.h"
 #include <libvmaf.h>
 #include "libavutil/avstring.h"
@ -36,6 +38,13 @@
 #include "internal.h"
 #include "video.h"
 #if CONFIG_LIBVMAF_CUDA_FILTER
 #include <libvmaf_cuda.h>
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
 #endif
 typedef struct LIBVMAFContext {
    const AVClass *class;
    FFFrameSync fs;
@ -58,6 +67,9 @@ typedef struct LIBVMAFContext {
    unsigned model_cnt;
    unsigned frame_cnt;
    unsigned bpc;
 #if CONFIG_LIBVMAF_CUDA_FILTER
    VmafCudaState *cu_state;
 #endif
 } LIBVMAFContext;
 #define OFFSET(x) offsetof(LIBVMAFContext, x)
@ -717,3 +729,201 @@ const AVFilter ff_vf_libvmaf = {
    FILTER_OUTPUTS(libvmaf_outputs),
    FILTER_PIXFMTS_ARRAY(pix_fmts),
 };
 #if CONFIG_LIBVMAF_CUDA_FILTER
 static const enum AVPixelFormat supported_formats[] = {
    AV_PIX_FMT_YUV420P,
    AV_PIX_FMT_YUV444P16,
 };
 static int format_is_supported(enum AVPixelFormat fmt)
 {
    int i;
    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
        if (supported_formats[i] == fmt)
            return 1;
    return 0;
 }
 static int config_props_cuda(AVFilterLink *outlink)
 {
    int err;
    AVFilterContext *ctx = outlink->src;
    LIBVMAFContext *s = ctx->priv;
    AVFilterLink *inlink = ctx->inputs[0];
    AVHWFramesContext *frames_ctx = (AVHWFramesContext*) inlink->hw_frames_ctx->data;
    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
    CUcontext cu_ctx = device_hwctx->cuda_ctx;
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frames_ctx->sw_format);
    VmafConfiguration cfg = {
        .log_level = log_level_map(av_log_get_level()),
        .n_subsample = s->n_subsample,
        .n_threads = s->n_threads,
    };
    VmafCudaPictureConfiguration cuda_pic_cfg = {
        .pic_params = {
            .bpc = desc->comp[0].depth,
            .w = inlink->w,
            .h = inlink->h,
            .pix_fmt = pix_fmt_map(frames_ctx->sw_format),
        },
        .pic_prealloc_method = VMAF_CUDA_PICTURE_PREALLOCATION_METHOD_DEVICE,
    };
    VmafCudaConfiguration cuda_cfg = {
        .cu_ctx = cu_ctx,
    };
    if (!format_is_supported(frames_ctx->sw_format)) {
        av_log(s, AV_LOG_ERROR,
               "Unsupported input format: %s\n", desc->name);
        return AVERROR(EINVAL);
    }
    err = vmaf_init(&s->vmaf, cfg);
    if (err)
        return AVERROR(EINVAL);
    err = vmaf_cuda_state_init(&s->cu_state, cuda_cfg);
    if (err)
        return AVERROR(EINVAL);
    err = vmaf_cuda_import_state(s->vmaf, s->cu_state);
    if (err)
        return AVERROR(EINVAL);
    err = vmaf_cuda_preallocate_pictures(s->vmaf, cuda_pic_cfg);
    if (err < 0)
        return err;
    err = parse_deprecated_options(ctx);
    if (err)
        return err;
    err = parse_models(ctx);
    if (err)
        return err;
    err = parse_features(ctx);
    if (err)
        return err;
    return config_output(outlink);
 }
 static int copy_picture_data_cuda(VmafContext* vmaf,
                                  AVCUDADeviceContext* device_hwctx,
                                  AVFrame* src, VmafPicture* dst,
                                  enum AVPixelFormat pix_fmt)
 {
    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(pix_fmt);
    CudaFunctions *cu = device_hwctx->internal->cuda_dl;
    CUDA_MEMCPY2D m = {
        .srcMemoryType = CU_MEMORYTYPE_DEVICE,
        .dstMemoryType = CU_MEMORYTYPE_DEVICE,
    };
    int err = vmaf_cuda_fetch_preallocated_picture(vmaf, dst);
    if (err)
        return AVERROR(ENOMEM);
    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
    if (err)
        return AVERROR_EXTERNAL;
    for (unsigned i = 0; i < pix_desc->nb_components; i++) {
        m.srcDevice = (CUdeviceptr) src->data[i];
        m.srcPitch = src->linesize[i];
        m.dstDevice = (CUdeviceptr) dst->data[i];
        m.dstPitch = dst->stride[i];
        m.WidthInBytes = dst->w[i] * ((dst->bpc + 7) / 8);
        m.Height = dst->h[i];
        err = cu->cuMemcpy2D(&m);
        if (err)
            return AVERROR_EXTERNAL;
        break;
    }
    err = cu->cuCtxPopCurrent(NULL);
    if (err)
        return AVERROR_EXTERNAL;
    return 0;
 }
 static int do_vmaf_cuda(FFFrameSync* fs)
 {
    AVFilterContext* ctx = fs->parent;
    LIBVMAFContext* s = ctx->priv;
    AVFilterLink *inlink = ctx->inputs[0];
    AVHWFramesContext *frames_ctx = (AVHWFramesContext*) inlink->hw_frames_ctx->data;
    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
    VmafPicture pic_ref, pic_dist;
    AVFrame *ref, *dist;
    int err = 0;
    err = ff_framesync_dualinput_get(fs, &dist, &ref);
    if (err < 0)
        return err;
    if (ctx->is_disabled || !ref)
        return ff_filter_frame(ctx->outputs[0], dist);
    err = copy_picture_data_cuda(s->vmaf, device_hwctx, ref, &pic_ref,
                                 frames_ctx->sw_format);
    if (err) {
        av_log(s, AV_LOG_ERROR, "problem during copy_picture_data_cuda.\n");
        return AVERROR(ENOMEM);
    }
    err = copy_picture_data_cuda(s->vmaf, device_hwctx, dist, &pic_dist,
                                 frames_ctx->sw_format);
    if (err) {
        av_log(s, AV_LOG_ERROR, "problem during copy_picture_data_cuda.\n");
        return AVERROR(ENOMEM);
    }
    err = vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cnt++);
    if (err) {
        av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
        return AVERROR(EINVAL);
    }
    return ff_filter_frame(ctx->outputs[0], dist);
 }
 static av_cold int init_cuda(AVFilterContext *ctx)
 {
    LIBVMAFContext *s = ctx->priv;
    s->fs.on_event = do_vmaf_cuda;
    return 0;
 }
 static const AVFilterPad libvmaf_outputs_cuda[] = {
    {
        .name         = "default",
        .type         = AVMEDIA_TYPE_VIDEO,
        .config_props = config_props_cuda,
    },
 };
 const AVFilter ff_vf_libvmaf_cuda = {
    .name           = "libvmaf_cuda",
    .description    = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
    .preinit        = libvmaf_framesync_preinit,
    .init           = init_cuda,
    .uninit         = uninit,
    .activate       = activate,
    .priv_size      = sizeof(LIBVMAFContext),
    .priv_class     = &libvmaf_class,
    FILTER_INPUTS(libvmaf_inputs),
    FILTER_OUTPUTS(libvmaf_outputs_cuda),
    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA),
    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
 };
 #endif