ffmpeg/libavfilter/vf_scale_vulkan.c

537 lines
21 KiB
C
Raw Normal View History

/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan.h"
#include "scale_eval.h"
#include "internal.h"
#include "colorspace.h"
#define CGROUPS (int [3]){ 32, 32, 1 }
enum ScalerFunc {
F_BILINEAR = 0,
F_NEAREST,
F_NB,
};
typedef struct ScaleVulkanContext {
VulkanFilterContext vkctx;
int initialized;
FFVkExecContext *exec;
VulkanPipeline *pl;
FFVkBuffer params_buf;
/* Shader updators, must be in the main filter struct */
VkDescriptorImageInfo input_images[3];
VkDescriptorImageInfo output_images[3];
VkDescriptorBufferInfo params_desc;
enum ScalerFunc scaler;
char *out_format_string;
enum AVColorRange out_range;
char *w_expr;
char *h_expr;
} ScaleVulkanContext;
static const char scale_bilinear[] = {
C(0, vec4 scale_bilinear(int idx, ivec2 pos, vec2 crop_range, vec2 crop_off))
C(0, { )
C(1, vec2 npos = (vec2(pos) + 0.5f) / imageSize(output_img[idx]); )
C(1, npos *= crop_range; /* Reduce the range */ )
C(1, npos += crop_off; /* Offset the start */ )
C(1, return texture(input_img[idx], npos); )
C(0, } )
};
static const char rgb2yuv[] = {
C(0, vec4 rgb2yuv(vec4 src, int fullrange) )
C(0, { )
C(1, src *= yuv_matrix; )
C(1, if (fullrange == 1) { )
C(2, src += vec4(0.0, 0.5, 0.5, 0.0); )
C(1, } else { )
C(2, src *= vec4(219.0 / 255.0, 224.0 / 255.0, 224.0 / 255.0, 1.0); )
C(2, src += vec4(16.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0, 0.0); )
C(1, } )
C(1, return src; )
C(0, } )
};
static const char write_nv12[] = {
C(0, void write_nv12(vec4 src, ivec2 pos) )
C(0, { )
C(1, imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0)); )
C(1, pos /= ivec2(2); )
C(1, imageStore(output_img[1], pos, vec4(src.g, src.b, 0.0, 0.0)); )
C(0, } )
};
static const char write_420[] = {
C(0, void write_420(vec4 src, ivec2 pos) )
C(0, { )
C(1, imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0)); )
C(1, pos /= ivec2(2); )
C(1, imageStore(output_img[1], pos, vec4(src.g, 0.0, 0.0, 0.0)); )
C(1, imageStore(output_img[2], pos, vec4(src.b, 0.0, 0.0, 0.0)); )
C(0, } )
};
static const char write_444[] = {
C(0, void write_444(vec4 src, ivec2 pos) )
C(0, { )
C(1, imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0)); )
C(1, imageStore(output_img[1], pos, vec4(src.g, 0.0, 0.0, 0.0)); )
C(1, imageStore(output_img[2], pos, vec4(src.b, 0.0, 0.0, 0.0)); )
C(0, } )
};
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
VkSampler *sampler;
VkFilter sampler_mode;
ScaleVulkanContext *s = ctx->priv;
int crop_x = in->crop_left;
int crop_y = in->crop_top;
int crop_w = in->width - (in->crop_left + in->crop_right);
int crop_h = in->height - (in->crop_top + in->crop_bottom);
s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
s->vkctx.queue_count = GET_QUEUE_COUNT(s->vkctx.hwctx, 0, 1, 0);
s->vkctx.cur_queue_idx = av_get_random_seed() % s->vkctx.queue_count;
switch (s->scaler) {
case F_NEAREST:
sampler_mode = VK_FILTER_NEAREST;
break;
case F_BILINEAR:
sampler_mode = VK_FILTER_LINEAR;
break;
};
/* Create a sampler */
sampler = ff_vk_init_sampler(ctx, 0, sampler_mode);
if (!sampler)
return AVERROR_EXTERNAL;
s->pl = ff_vk_create_pipeline(ctx);
if (!s->pl)
return AVERROR(ENOMEM);
{ /* Create the shader */
VulkanDescriptorSetBinding desc_i[2] = {
{
.name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = av_pix_fmt_count_planes(s->vkctx.input_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->input_images,
.samplers = DUP_SAMPLER_ARRAY4(*sampler),
},
{
.name = "output_img",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
.mem_quali = "writeonly",
.dimensions = 2,
.elems = av_pix_fmt_count_planes(s->vkctx.output_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->output_images,
},
};
VulkanDescriptorSetBinding desc_b = {
.name = "params",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.mem_quali = "readonly",
.mem_layout = "std430",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = &s->params_desc,
.buf_content = "mat4 yuv_matrix;",
};
SPIRVShader *shd = ff_vk_init_shader(ctx, s->pl, "scale_compute",
VK_SHADER_STAGE_COMPUTE_BIT);
if (!shd)
return AVERROR(ENOMEM);
ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS);
RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 2, 0)); /* set 0 */
RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 0 */
GLSLD( scale_bilinear );
if (s->vkctx.output_format != s->vkctx.input_format) {
GLSLD( rgb2yuv );
}
switch (s->vkctx.output_format) {
case AV_PIX_FMT_NV12: GLSLD(write_nv12); break;
case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
default: break;
}
GLSLC(0, void main() );
GLSLC(0, { );
GLSLC(1, ivec2 size; );
GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height);
GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h);
GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y);
GLSLC(0, );
if (s->vkctx.output_format == s->vkctx.input_format) {
for (int i = 0; i < desc_i[1].elems; i++) {
GLSLF(1, size = imageSize(output_img[%i]); ,i);
GLSLC(1, if (IS_WITHIN(pos, size)) { );
switch (s->scaler) {
case F_NEAREST:
case F_BILINEAR:
GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i);
GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
break;
};
GLSLC(1, } );
}
} else {
GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); );
GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG);
switch (s->vkctx.output_format) {
case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break;
case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break;
case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break;
default: return AVERROR(EINVAL);
}
}
GLSLC(0, } );
RET(ff_vk_compile_shader(ctx, shd, "main"));
}
RET(ff_vk_init_pipeline_layout(ctx, s->pl));
RET(ff_vk_init_compute_pipeline(ctx, s->pl));
if (s->vkctx.output_format != s->vkctx.input_format) {
const struct LumaCoefficients *lcoeffs;
double tmp_mat[3][3];
struct {
float yuv_matrix[4][4];
} *par;
lcoeffs = ff_get_luma_coefficients(in->colorspace);
if (!lcoeffs) {
av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n");
return AVERROR(EINVAL);
}
err = ff_vk_create_buf(ctx, &s->params_buf,
sizeof(*par),
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err)
return err;
err = ff_vk_map_buffers(ctx, &s->params_buf, (uint8_t **)&par, 1, 0);
if (err)
return err;
ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
memset(par, 0, sizeof(*par));
for (int y = 0; y < 3; y++)
for (int x = 0; x < 3; x++)
par->yuv_matrix[x][y] = tmp_mat[x][y];
par->yuv_matrix[3][3] = 1.0;
err = ff_vk_unmap_buffers(ctx, &s->params_buf, 1, 1);
if (err)
return err;
s->params_desc.buffer = s->params_buf.buf;
s->params_desc.range = VK_WHOLE_SIZE;
ff_vk_update_descriptor_set(ctx, s->pl, 1);
}
/* Execution context */
RET(ff_vk_create_exec_ctx(ctx, &s->exec));
s->initialized = 1;
return 0;
fail:
return err;
}
static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
{
int err = 0;
VkCommandBuffer cmd_buf;
ScaleVulkanContext *s = avctx->priv;
AVVkFrame *in = (AVVkFrame *)in_f->data[0];
AVVkFrame *out = (AVVkFrame *)out_f->data[0];
VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2];
int barrier_count = 0;
/* Update descriptors and init the exec context */
ff_vk_start_exec_recording(avctx, s->exec);
cmd_buf = ff_vk_get_exec_buf(avctx, s->exec);
for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) {
RET(ff_vk_create_imageview(avctx, s->exec, &s->input_images[i].imageView,
in->img[i],
av_vkfmt_from_pixfmt(s->vkctx.input_format)[i],
ff_comp_identity_map));
s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
RET(ff_vk_create_imageview(avctx, s->exec, &s->output_images[i].imageView,
out->img[i],
av_vkfmt_from_pixfmt(s->vkctx.output_format)[i],
ff_comp_identity_map));
s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
}
ff_vk_update_descriptor_set(avctx, s->pl, 0);
for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) {
VkImageMemoryBarrier bar = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.oldLayout = in->layout[i],
.newLayout = s->input_images[i].imageLayout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = in->img[i],
.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.subresourceRange.levelCount = 1,
.subresourceRange.layerCount = 1,
};
memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
in->layout[i] = bar.newLayout;
in->access[i] = bar.dstAccessMask;
}
for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
VkImageMemoryBarrier bar = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = out->layout[i],
.newLayout = s->output_images[i].imageLayout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = out->img[i],
.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.subresourceRange.levelCount = 1,
.subresourceRange.layerCount = 1,
};
memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
out->layout[i] = bar.newLayout;
out->access[i] = bar.dstAccessMask;
}
vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
0, NULL, 0, NULL, barrier_count, barriers);
ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl);
vkCmdDispatch(cmd_buf,
FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0],
FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
ff_vk_add_exec_dep(avctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
ff_vk_add_exec_dep(avctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
err = ff_vk_submit_exec_queue(avctx, s->exec);
if (err)
return err;
return err;
fail:
ff_vk_discard_exec_deps(avctx, s->exec);
return err;
}
static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
{
int err;
AVFilterContext *ctx = link->dst;
ScaleVulkanContext *s = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
if (!out) {
err = AVERROR(ENOMEM);
goto fail;
}
if (!s->initialized)
RET(init_filter(ctx, in));
RET(process_frames(ctx, out, in));
err = av_frame_copy_props(out, in);
if (err < 0)
goto fail;
if (s->out_range != AVCOL_RANGE_UNSPECIFIED)
out->color_range = s->out_range;
if (s->vkctx.output_format != s->vkctx.input_format)
out->chroma_location = AVCHROMA_LOC_TOPLEFT;
av_frame_free(&in);
return ff_filter_frame(outlink, out);
fail:
av_frame_free(&in);
av_frame_free(&out);
return err;
}
static int scale_vulkan_config_output(AVFilterLink *outlink)
{
int err;
AVFilterContext *avctx = outlink->src;
ScaleVulkanContext *s = avctx->priv;
AVFilterLink *inlink = outlink->src->inputs[0];
err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink,
&s->vkctx.output_width,
&s->vkctx.output_height);
if (err < 0)
return err;
if (s->out_format_string) {
s->vkctx.output_format = av_get_pix_fmt(s->out_format_string);
if (s->vkctx.output_format == AV_PIX_FMT_NONE) {
av_log(avctx, AV_LOG_ERROR, "Invalid output format.\n");
return AVERROR(EINVAL);
}
} else {
s->vkctx.output_format = s->vkctx.input_format;
}
if (s->vkctx.output_format != s->vkctx.input_format) {
if (!ff_vk_mt_is_np_rgb(s->vkctx.input_format)) {
av_log(avctx, AV_LOG_ERROR, "Unsupported input format for conversion\n");
return AVERROR(EINVAL);
}
if (s->vkctx.output_format != AV_PIX_FMT_NV12 &&
s->vkctx.output_format != AV_PIX_FMT_YUV420P &&
s->vkctx.output_format != AV_PIX_FMT_YUV444P) {
av_log(avctx, AV_LOG_ERROR, "Unsupported output format\n");
return AVERROR(EINVAL);
}
} else if (s->out_range != AVCOL_RANGE_UNSPECIFIED) {
av_log(avctx, AV_LOG_ERROR, "Cannot change range without converting format\n");
return AVERROR(EINVAL);
}
err = ff_vk_filter_config_output(outlink);
if (err < 0)
return err;
return 0;
}
static void scale_vulkan_uninit(AVFilterContext *avctx)
{
ScaleVulkanContext *s = avctx->priv;
ff_vk_filter_uninit(avctx);
ff_vk_free_buf(avctx, &s->params_buf);
s->initialized = 0;
}
#define OFFSET(x) offsetof(ScaleVulkanContext, x)
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
static const AVOption scale_vulkan_options[] = {
{ "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
{ "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
{ "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, "scaler" },
{ "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, "scaler" },
{ "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, "scaler" },
{ "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
{ "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, "range" },
{ "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
{ "limited", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
{ "jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
{ "mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
{ "tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
{ "pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
{ NULL },
};
AVFILTER_DEFINE_CLASS(scale_vulkan);
static const AVFilterPad scale_vulkan_inputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.filter_frame = &scale_vulkan_filter_frame,
.config_props = &ff_vk_filter_config_input,
},
};
static const AVFilterPad scale_vulkan_outputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.config_props = &scale_vulkan_config_output,
},
};
const AVFilter ff_vf_scale_vulkan = {
.name = "scale_vulkan",
.description = NULL_IF_CONFIG_SMALL("Scale Vulkan frames"),
.priv_size = sizeof(ScaleVulkanContext),
.init = &ff_vk_filter_init,
.uninit = &scale_vulkan_uninit,
2021-08-12 11:05:31 +00:00
FILTER_INPUTS(scale_vulkan_inputs),
FILTER_OUTPUTS(scale_vulkan_outputs),
avfilter: Replace query_formats callback with union of list and callback If one looks at the many query_formats callbacks in existence, one will immediately recognize that there is one type of default callback for video and a slightly different default callback for audio: It is "return ff_set_common_formats_from_list(ctx, pix_fmts);" for video with a filter-specific pix_fmts list. For audio, it is the same with a filter-specific sample_fmts list together with ff_set_common_all_samplerates() and ff_set_common_all_channel_counts(). This commit allows to remove the boilerplate query_formats callbacks by replacing said callback with a union consisting the old callback and pointers for pixel and sample format arrays. For the not uncommon case in which these lists only contain a single entry (besides the sentinel) enum AVPixelFormat and enum AVSampleFormat fields are also added to the union to store them directly in the AVFilter, thereby avoiding a relocation. The state of said union will be contained in a new, dedicated AVFilter field (the nb_inputs and nb_outputs fields have been shrunk to uint8_t in order to create a hole for this new field; this is no problem, as the maximum of all the nb_inputs is four; for nb_outputs it is only two). The state's default value coincides with the earlier default of query_formats being unset, namely that the filter accepts all formats (and also sample rates and channel counts/layouts for audio) provided that these properties agree coincide for all inputs and outputs. By using different union members for audio and video filters the type-unsafety of using the same functions for audio and video lists will furthermore be more confined to formats.c than before. When the new fields are used, they will also avoid allocations: Currently something nearly equivalent to ff_default_query_formats() is called after every successful call to a query_formats callback; yet in the common case that the newly allocated AVFilterFormats are not used at all (namely if there are no free links) these newly allocated AVFilterFormats are freed again without ever being used. Filters no longer using the callback will not exhibit this any more. Reviewed-by: Paul B Mahol <onemda@gmail.com> Reviewed-by: Nicolas George <george@nsup.org> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2021-09-27 10:07:35 +00:00
FILTER_QUERY_FUNC(&ff_vk_filter_query_formats),
.priv_class = &scale_vulkan_class,
.flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
};