ffmpeg/libswscale/graph.c
Niklas Haas 79452d382f swscale/graph: fix memleak of cascaded graphs
Just free them directly and discard the parent context.

Fixes: bf738412e8
Signed-off-by: Niklas Haas <git@haasn.dev>
Sponsored-by: Sovereign Tech Fund
2024-12-04 11:38:30 +01:00

620 lines
20 KiB
C

/*
* Copyright (C) 2024 Niklas Haas
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/avassert.h"
#include "libavutil/error.h"
#include "libavutil/imgutils.h"
#include "libavutil/macros.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/slicethread.h"
#include "libswscale/swscale.h"
#include "libswscale/utils.h"
#include "swscale_internal.h"
#include "graph.h"
static int pass_alloc_output(SwsPass *pass)
{
if (!pass || pass->output.fmt != AV_PIX_FMT_NONE)
return 0;
pass->output.fmt = pass->format;
return av_image_alloc(pass->output.data, pass->output.linesize, pass->width,
pass->num_slices * pass->slice_h, pass->format, 64);
}
/* slice_align should be a power of two, or 0 to disable slice threading */
static SwsPass *pass_add(SwsGraph *graph, void *priv, enum AVPixelFormat fmt,
int w, int h, SwsPass *input, int slice_align,
sws_filter_run_t run)
{
int ret;
SwsPass *pass = av_mallocz(sizeof(*pass));
if (!pass)
return NULL;
pass->graph = graph;
pass->run = run;
pass->priv = priv;
pass->format = fmt;
pass->width = w;
pass->height = h;
pass->input = input;
pass->output.fmt = AV_PIX_FMT_NONE;
ret = pass_alloc_output(input);
if (ret < 0) {
av_free(pass);
return NULL;
}
if (!slice_align) {
pass->slice_h = pass->height;
pass->num_slices = 1;
} else {
pass->slice_h = (pass->height + graph->num_threads - 1) / graph->num_threads;
pass->slice_h = FFALIGN(pass->slice_h, slice_align);
pass->num_slices = (pass->height + pass->slice_h - 1) / pass->slice_h;
}
ret = av_dynarray_add_nofree(&graph->passes, &graph->num_passes, pass);
if (ret < 0)
av_freep(&pass);
return pass;
}
/* Wrapper around pass_add that chains a pass "in-place" */
static int pass_append(SwsGraph *graph, void *priv, enum AVPixelFormat fmt,
int w, int h, SwsPass **pass, int slice_align,
sws_filter_run_t run)
{
SwsPass *new = pass_add(graph, priv, fmt, w, h, *pass, slice_align, run);
if (!new)
return AVERROR(ENOMEM);
*pass = new;
return 0;
}
static int vshift(enum AVPixelFormat fmt, int plane)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
return (plane == 1 || plane == 2) ? desc->log2_chroma_h : 0;
}
/* Shift an image vertically by y lines */
static SwsImg shift_img(const SwsImg *img_base, int y)
{
SwsImg img = *img_base;
for (int i = 0; i < 4 && img.data[i]; i++)
img.data[i] += (y >> vshift(img.fmt, i)) * img.linesize[i];
return img;
}
static void run_copy(const SwsImg *out_base, const SwsImg *in_base,
int y, int h, const SwsPass *pass)
{
SwsImg in = shift_img(in_base, y);
SwsImg out = shift_img(out_base, y);
for (int i = 0; i < FF_ARRAY_ELEMS(in.data) && in.data[i]; i++) {
const int lines = h >> vshift(in.fmt, i);
if (in.linesize[i] == out.linesize[i]) {
memcpy(out.data[i], in.data[i], lines * out.linesize[i]);
} else {
const int linesize = FFMIN(out.linesize[i], in.linesize[i]);
for (int j = 0; j < lines; j++) {
memcpy(out.data[i], in.data[i], linesize);
in.data[i] += in.linesize[i];
out.data[i] += out.linesize[i];
}
}
}
}
static void run_rgb0(const SwsImg *out, const SwsImg *in, int y, int h,
const SwsPass *pass)
{
SwsInternal *c = pass->priv;
const int x0 = c->src0Alpha - 1;
const int w4 = 4 * pass->width;
const int src_stride = in->linesize[0];
const int dst_stride = out->linesize[0];
const uint8_t *src = in->data[0] + y * src_stride;
uint8_t *dst = out->data[0] + y * dst_stride;
for (int y = 0; y < h; y++) {
memcpy(dst, src, w4 * sizeof(*dst));
for (int x = x0; x < w4; x += 4)
dst[x] = 0xFF;
src += src_stride;
dst += dst_stride;
}
}
static void run_xyz2rgb(const SwsImg *out, const SwsImg *in, int y, int h,
const SwsPass *pass)
{
ff_xyz12Torgb48(pass->priv, out->data[0] + y * out->linesize[0], out->linesize[0],
in->data[0] + y * in->linesize[0], in->linesize[0],
pass->width, h);
}
static void run_rgb2xyz(const SwsImg *out, const SwsImg *in, int y, int h,
const SwsPass *pass)
{
ff_rgb48Toxyz12(pass->priv, out->data[0] + y * out->linesize[0], out->linesize[0],
in->data[0] + y * in->linesize[0], in->linesize[0],
pass->width, h);
}
/***********************************************************************
* Internal ff_swscale() wrapper. This re-uses the legacy scaling API. *
* This is considered fully deprecated, and will be replaced by a full *
* reimplementation ASAP. *
***********************************************************************/
static void free_legacy_swscale(void *priv)
{
SwsContext *sws = priv;
sws_free_context(&sws);
}
static void setup_legacy_swscale(const SwsImg *out, const SwsImg *in,
const SwsPass *pass)
{
SwsContext *sws = pass->priv;
SwsInternal *c = sws_internal(sws);
if (sws->flags & SWS_BITEXACT && sws->dither == SWS_DITHER_ED && c->dither_error[0]) {
for (int i = 0; i < 4; i++)
memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (sws->dst_w + 2));
}
if (usePal(sws->src_format))
ff_update_palette(c, (const uint32_t *) in->data[1]);
}
static inline SwsContext *slice_ctx(const SwsPass *pass, int y)
{
SwsContext *sws = pass->priv;
SwsInternal *parent = sws_internal(sws);
if (pass->num_slices == 1)
return sws;
av_assert1(parent->nb_slice_ctx == pass->num_slices);
sws = parent->slice_ctx[y / pass->slice_h];
if (usePal(sws->src_format)) {
SwsInternal *sub = sws_internal(sws);
memcpy(sub->pal_yuv, parent->pal_yuv, sizeof(sub->pal_yuv));
memcpy(sub->pal_rgb, parent->pal_rgb, sizeof(sub->pal_rgb));
}
return sws;
}
static void run_legacy_unscaled(const SwsImg *out, const SwsImg *in_base,
int y, int h, const SwsPass *pass)
{
SwsContext *sws = slice_ctx(pass, y);
SwsInternal *c = sws_internal(sws);
const SwsImg in = shift_img(in_base, y);
c->convert_unscaled(c, (const uint8_t *const *) in.data, in.linesize, y, h,
out->data, out->linesize);
}
static void run_legacy_swscale(const SwsImg *out_base, const SwsImg *in,
int y, int h, const SwsPass *pass)
{
SwsContext *sws = slice_ctx(pass, y);
SwsInternal *c = sws_internal(sws);
const SwsImg out = shift_img(out_base, y);
ff_swscale(c, (const uint8_t *const *) in->data, in->linesize, 0,
sws->src_h, out.data, out.linesize, y, h);
}
static void get_chroma_pos(SwsGraph *graph, int *h_chr_pos, int *v_chr_pos,
const SwsFormat *fmt)
{
enum AVChromaLocation chroma_loc = fmt->loc;
const int sub_x = fmt->desc->log2_chroma_w;
const int sub_y = fmt->desc->log2_chroma_h;
int x_pos, y_pos;
/* Explicitly default to center siting for compatibility with swscale */
if (chroma_loc == AVCHROMA_LOC_UNSPECIFIED) {
chroma_loc = AVCHROMA_LOC_CENTER;
graph->incomplete |= sub_x || sub_y;
}
/* av_chroma_location_enum_to_pos() always gives us values in the range from
* 0 to 256, but we need to adjust this to the true value range of the
* subsampling grid, which may be larger for h/v_sub > 1 */
av_chroma_location_enum_to_pos(&x_pos, &y_pos, chroma_loc);
x_pos *= (1 << sub_x) - 1;
y_pos *= (1 << sub_y) - 1;
/* Fix vertical chroma position for interlaced frames */
if (sub_y && fmt->interlaced) {
/* When vertically subsampling, chroma samples are effectively only
* placed next to even rows. To access them from the odd field, we need
* to account for this shift by offsetting the distance of one luma row.
*
* For 4x vertical subsampling (v_sub == 2), they are only placed
* next to every *other* even row, so we need to shift by three luma
* rows to get to the chroma sample. */
if (graph->field == FIELD_BOTTOM)
y_pos += (256 << sub_y) - 256;
/* Luma row distance is doubled for fields, so halve offsets */
y_pos >>= 1;
}
/* Explicitly strip chroma offsets when not subsampling, because it
* interferes with the operation of flags like SWS_FULL_CHR_H_INP */
*h_chr_pos = sub_x ? x_pos : -513;
*v_chr_pos = sub_y ? y_pos : -513;
}
static void legacy_chr_pos(SwsGraph *graph, int *chr_pos, int override, int *warned)
{
if (override == -513 || override == *chr_pos)
return;
if (!*warned) {
av_log(NULL, AV_LOG_WARNING,
"Setting chroma position directly is deprecated, make sure "
"the frame is tagged with the correct chroma location.\n");
*warned = 1;
}
*chr_pos = override;
}
static int init_legacy_subpass(SwsGraph *graph, SwsContext *sws,
SwsPass *input, SwsPass **output)
{
SwsInternal *c = sws_internal(sws);
const int src_w = sws->src_w, src_h = sws->src_h;
const int dst_w = sws->dst_w, dst_h = sws->dst_h;
const int unscaled = src_w == dst_w && src_h == dst_h;
int align = c->dst_slice_align;
SwsPass *pass = NULL;
int ret;
if (c->cascaded_context[0]) {
const int num_cascaded = c->cascaded_context[2] ? 3 : 2;
for (int i = 0; i < num_cascaded; i++) {
SwsContext *sub = c->cascaded_context[i];
const int is_last = i + 1 == num_cascaded;
ret = init_legacy_subpass(graph, sub, input, is_last ? output : &input);
if (ret < 0)
return ret;
/* Steal cascaded context, so we can free the parent */
c->cascaded_context[i] = NULL;
}
sws_free_context(&sws);
return 0;
}
if (sws->dither == SWS_DITHER_ED && !c->convert_unscaled)
align = 0; /* disable slice threading */
if (c->src0Alpha && !c->dst0Alpha && isALPHA(sws->dst_format)) {
ret = pass_append(graph, c, AV_PIX_FMT_RGBA, src_w, src_h, &input, 1, run_rgb0);
if (ret < 0)
return ret;
}
if (c->srcXYZ && !(c->dstXYZ && unscaled)) {
ret = pass_append(graph, c, AV_PIX_FMT_RGB48, src_w, src_h, &input, 1, run_xyz2rgb);
if (ret < 0)
return ret;
}
pass = pass_add(graph, sws, sws->dst_format, dst_w, dst_h, input, align,
c->convert_unscaled ? run_legacy_unscaled : run_legacy_swscale);
if (!pass)
return AVERROR(ENOMEM);
pass->setup = setup_legacy_swscale;
pass->free = free_legacy_swscale;
/**
* For slice threading, we need to create sub contexts, similar to how
* swscale normally handles it internally. The most important difference
* is that we handle cascaded contexts before threaded contexts; whereas
* context_init_threaded() does it the other way around.
*/
if (pass->num_slices > 1) {
c->slice_ctx = av_calloc(pass->num_slices, sizeof(*c->slice_ctx));
if (!c->slice_ctx)
return AVERROR(ENOMEM);
for (int i = 0; i < pass->num_slices; i++) {
SwsContext *slice;
SwsInternal *c2;
slice = c->slice_ctx[i] = sws_alloc_context();
if (!slice)
return AVERROR(ENOMEM);
c->nb_slice_ctx++;
c2 = sws_internal(slice);
c2->parent = sws;
ret = av_opt_copy(slice, sws);
if (ret < 0)
return ret;
ret = ff_sws_init_single_context(slice, NULL, NULL);
if (ret < 0)
return ret;
sws_setColorspaceDetails(slice, c->srcColorspaceTable,
slice->src_range, c->dstColorspaceTable,
slice->dst_range, c->brightness, c->contrast,
c->saturation);
for (int i = 0; i < FF_ARRAY_ELEMS(c->srcColorspaceTable); i++) {
c2->srcColorspaceTable[i] = c->srcColorspaceTable[i];
c2->dstColorspaceTable[i] = c->dstColorspaceTable[i];
}
}
}
if (c->dstXYZ && !(c->srcXYZ && unscaled)) {
ret = pass_append(graph, c, AV_PIX_FMT_RGB48, dst_w, dst_h, &pass, 1, run_rgb2xyz);
if (ret < 0)
return ret;
}
*output = pass;
return 0;
}
static int add_legacy_sws_pass(SwsGraph *graph, SwsFormat src, SwsFormat dst,
SwsPass *input, SwsPass **output)
{
int ret, warned = 0;
SwsContext *const ctx = graph->ctx;
SwsContext *sws = sws_alloc_context();
if (!sws)
return AVERROR(ENOMEM);
sws->flags = ctx->flags;
sws->dither = ctx->dither;
sws->alpha_blend = ctx->alpha_blend;
sws->gamma_flag = ctx->gamma_flag;
sws->src_w = src.width;
sws->src_h = src.height;
sws->src_format = src.format;
sws->src_range = src.range == AVCOL_RANGE_JPEG;
sws->dst_w = dst.width;
sws->dst_h = dst.height;
sws->dst_format = dst.format;
sws->dst_range = dst.range == AVCOL_RANGE_JPEG;
get_chroma_pos(graph, &sws->src_h_chr_pos, &sws->src_v_chr_pos, &src);
get_chroma_pos(graph, &sws->dst_h_chr_pos, &sws->dst_v_chr_pos, &dst);
graph->incomplete |= src.range == AVCOL_RANGE_UNSPECIFIED;
graph->incomplete |= dst.range == AVCOL_RANGE_UNSPECIFIED;
/* Allow overriding chroma position with the legacy API */
legacy_chr_pos(graph, &sws->src_h_chr_pos, ctx->src_h_chr_pos, &warned);
legacy_chr_pos(graph, &sws->src_v_chr_pos, ctx->src_v_chr_pos, &warned);
legacy_chr_pos(graph, &sws->dst_h_chr_pos, ctx->dst_h_chr_pos, &warned);
legacy_chr_pos(graph, &sws->dst_v_chr_pos, ctx->dst_v_chr_pos, &warned);
ret = sws_init_context(sws, NULL, NULL);
if (ret < 0) {
sws_free_context(&sws);
return ret;
}
/* Set correct color matrices */
{
int in_full, out_full, brightness, contrast, saturation;
const int *inv_table, *table;
sws_getColorspaceDetails(sws, (int **)&inv_table, &in_full,
(int **)&table, &out_full,
&brightness, &contrast, &saturation);
inv_table = sws_getCoefficients(src.csp);
table = sws_getCoefficients(dst.csp);
graph->incomplete |= src.csp != dst.csp &&
(src.csp == AVCOL_SPC_UNSPECIFIED ||
dst.csp == AVCOL_SPC_UNSPECIFIED);
sws_setColorspaceDetails(sws, inv_table, in_full, table, out_full,
brightness, contrast, saturation);
}
ret = init_legacy_subpass(graph, sws, input, output);
if (ret < 0) {
sws_free_context(&sws);
return ret;
}
return 0;
}
/***************************************
* Main filter graph construction code *
***************************************/
static int init_passes(SwsGraph *graph)
{
const SwsFormat src = graph->src;
const SwsFormat dst = graph->dst;
SwsPass *pass = NULL; /* read from main input image */
int ret;
if (!ff_fmt_equal(&src, &dst)) {
ret = add_legacy_sws_pass(graph, src, dst, pass, &pass);
if (ret < 0)
return ret;
}
if (!pass) {
/* No passes were added, so no operations were necessary */
graph->noop = 1;
/* Add threaded memcpy pass */
pass = pass_add(graph, NULL, dst.format, dst.width, dst.height, pass, 1, run_copy);
if (!pass)
return AVERROR(ENOMEM);
}
return 0;
}
static void sws_graph_worker(void *priv, int jobnr, int threadnr, int nb_jobs,
int nb_threads)
{
SwsGraph *graph = priv;
const SwsPass *pass = graph->exec.pass;
const SwsImg *input = pass->input ? &pass->input->output : &graph->exec.input;
const SwsImg *output = pass->output.fmt != AV_PIX_FMT_NONE ? &pass->output : &graph->exec.output;
const int slice_y = jobnr * pass->slice_h;
const int slice_h = FFMIN(pass->slice_h, pass->height - slice_y);
pass->run(output, input, slice_y, slice_h, pass);
}
int sws_graph_create(SwsContext *ctx, const SwsFormat *dst, const SwsFormat *src,
int field, SwsGraph **out_graph)
{
int ret;
SwsGraph *graph = av_mallocz(sizeof(*graph));
if (!graph)
return AVERROR(ENOMEM);
graph->ctx = ctx;
graph->src = *src;
graph->dst = *dst;
graph->field = field;
graph->opts_copy = *ctx;
graph->exec.input.fmt = src->format;
graph->exec.output.fmt = dst->format;
ret = avpriv_slicethread_create(&graph->slicethread, (void *) graph,
sws_graph_worker, NULL, ctx->threads);
if (ret == AVERROR(ENOSYS))
graph->num_threads = 1;
else if (ret < 0)
goto error;
else
graph->num_threads = ret;
ret = init_passes(graph);
if (ret < 0)
goto error;
*out_graph = graph;
return 0;
error:
sws_graph_free(&graph);
return ret;
}
void sws_graph_free(SwsGraph **pgraph)
{
SwsGraph *graph = *pgraph;
if (!graph)
return;
avpriv_slicethread_free(&graph->slicethread);
for (int i = 0; i < graph->num_passes; i++) {
SwsPass *pass = graph->passes[i];
if (pass->free)
pass->free(pass->priv);
if (pass->output.fmt != AV_PIX_FMT_NONE)
av_free(pass->output.data[0]);
av_free(pass);
}
av_free(graph->passes);
av_free(graph);
*pgraph = NULL;
}
/* Tests only options relevant to SwsGraph */
static int opts_equal(const SwsContext *c1, const SwsContext *c2)
{
return c1->flags == c2->flags &&
c1->threads == c2->threads &&
c1->dither == c2->dither &&
c1->alpha_blend == c2->alpha_blend &&
c1->gamma_flag == c2->gamma_flag &&
c1->src_h_chr_pos == c2->src_h_chr_pos &&
c1->src_v_chr_pos == c2->src_v_chr_pos &&
c1->dst_h_chr_pos == c2->dst_h_chr_pos &&
c1->dst_v_chr_pos == c2->dst_v_chr_pos &&
!memcmp(c1->scaler_params, c2->scaler_params, sizeof(c1->scaler_params));
}
int sws_graph_reinit(SwsContext *ctx, const SwsFormat *dst, const SwsFormat *src,
int field, SwsGraph **out_graph)
{
const SwsGraph *graph = *out_graph;
if (graph && ff_fmt_equal(&graph->src, src) &&
ff_fmt_equal(&graph->dst, dst) &&
opts_equal(ctx, &graph->opts_copy))
return 0;
sws_graph_free(out_graph);
return sws_graph_create(ctx, dst, src, field, out_graph);
}
void sws_graph_run(SwsGraph *graph, uint8_t *const out_data[4],
const int out_linesize[4],
const uint8_t *const in_data[4],
const int in_linesize[4])
{
SwsImg *out = &graph->exec.output;
SwsImg *in = &graph->exec.input;
memcpy(out->data, out_data, sizeof(out->data));
memcpy(out->linesize, out_linesize, sizeof(out->linesize));
memcpy(in->data, in_data, sizeof(in->data));
memcpy(in->linesize, in_linesize, sizeof(in->linesize));
for (int i = 0; i < graph->num_passes; i++) {
const SwsPass *pass = graph->passes[i];
graph->exec.pass = pass;
if (pass->setup)
pass->setup(out, in, pass);
avpriv_slicethread_execute(graph->slicethread, pass->num_slices, 0);
}
}