avfilter/vf_fftdnoiz: add slice threading support

This commit is contained in:
Paul B Mahol 2021-10-18 18:03:16 +02:00
parent 762e18da3f
commit 374f2ac370
1 changed files with 154 additions and 82 deletions

View File

@ -25,6 +25,8 @@
#include "internal.h"
#include "libavcodec/avfft.h"
#define MAX_THREADS 32
enum BufferTypes {
CURRENT,
PREV,
@ -40,11 +42,9 @@ typedef struct PlaneContext {
float n;
float *buffer[BSIZE];
FFTComplex *hdata, *vdata;
FFTComplex *hdata[MAX_THREADS], *vdata[MAX_THREADS];
int data_linesize;
int buffer_linesize;
FFTContext *fft, *ifft;
} PlaneContext;
typedef struct FFTdnoizContext {
@ -62,8 +62,11 @@ typedef struct FFTdnoizContext {
int depth;
int nb_planes;
int nb_threads;
PlaneContext planes[4];
FFTContext *fft[MAX_THREADS], *ifft[MAX_THREADS];
void (*import_row)(FFTComplex *dst, uint8_t *src, int rw);
void (*export_row)(FFTComplex *src, uint8_t *dst, int rw, float scale, int depth);
} FFTdnoizContext;
@ -90,23 +93,6 @@ static const AVOption fftdnoiz_options[] = {
AVFILTER_DEFINE_CLASS(fftdnoiz);
static av_cold int init(AVFilterContext *ctx)
{
FFTdnoizContext *s = ctx->priv;
int i;
for (i = 0; i < 4; i++) {
PlaneContext *p = &s->planes[i];
p->fft = av_fft_init(s->block_bits, 0);
p->ifft = av_fft_init(s->block_bits, 1);
if (!p->fft || !p->ifft)
return AVERROR(ENOMEM);
}
return 0;
}
static const enum AVPixelFormat pix_fmts[] = {
AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9,
AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
@ -201,6 +187,14 @@ static int config_input(AVFilterLink *inlink)
s->planes[0].planeheight = s->planes[3].planeheight = inlink->h;
s->nb_planes = av_pix_fmt_count_planes(inlink->format);
s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), MAX_THREADS);
for (int i = 0; i < s->nb_threads; i++) {
s->fft[i] = av_fft_init(s->block_bits, 0);
s->ifft[i] = av_fft_init(s->block_bits, 1);
if (!s->fft[i] || !s->ifft[i])
return AVERROR(ENOMEM);
}
for (i = 0; i < s->nb_planes; i++) {
PlaneContext *p = &s->planes[i];
@ -230,10 +224,12 @@ static int config_input(AVFilterLink *inlink)
return AVERROR(ENOMEM);
}
p->data_linesize = 2 * p->b * sizeof(float);
p->hdata = av_calloc(p->b, p->data_linesize);
p->vdata = av_calloc(p->b, p->data_linesize);
if (!p->hdata || !p->vdata)
return AVERROR(ENOMEM);
for (int j = 0; j < s->nb_threads; j++) {
p->hdata[j] = av_calloc(p->b, p->data_linesize);
p->vdata[j] = av_calloc(p->b, p->data_linesize);
if (!p->hdata[j] || !p->vdata[j])
return AVERROR(ENOMEM);
}
}
return 0;
@ -241,7 +237,8 @@ static int config_input(AVFilterLink *inlink)
static void import_plane(FFTdnoizContext *s,
uint8_t *srcp, int src_linesize,
float *buffer, int buffer_linesize, int plane)
float *buffer, int buffer_linesize, int plane,
int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int width = p->planewidth;
@ -253,12 +250,14 @@ static void import_plane(FFTdnoizContext *s,
const int noy = p->noy;
const int bpp = (s->depth + 7) / 8;
const int data_linesize = p->data_linesize / sizeof(FFTComplex);
FFTComplex *hdata = p->hdata;
FFTComplex *vdata = p->vdata;
const int slice_start = (noy * jobnr) / nb_jobs;
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
FFTComplex *hdata = p->hdata[jobnr];
FFTComplex *vdata = p->vdata[jobnr];
int x, y, i, j;
buffer_linesize /= sizeof(float);
for (y = 0; y < noy; y++) {
for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) {
const int rh = FFMIN(block, height - y * size);
const int rw = FFMIN(block, width - x * size);
@ -272,8 +271,8 @@ static void import_plane(FFTdnoizContext *s,
dst[j].re = dst[block - j - 1].re;
dst[j].im = 0;
}
av_fft_permute(p->fft, dst);
av_fft_calc(p->fft, dst);
av_fft_permute(s->fft[jobnr], dst);
av_fft_calc(s->fft[jobnr], dst);
src += src_linesize;
dst += data_linesize;
@ -292,8 +291,8 @@ static void import_plane(FFTdnoizContext *s,
for (i = 0; i < block; i++) {
for (j = 0; j < block; j++)
dst[j] = ssrc[j * data_linesize + i];
av_fft_permute(p->fft, dst);
av_fft_calc(p->fft, dst);
av_fft_permute(s->fft[jobnr], dst);
av_fft_calc(s->fft[jobnr], dst);
memcpy(bdst, dst, block * sizeof(FFTComplex));
dst += data_linesize;
@ -305,7 +304,8 @@ static void import_plane(FFTdnoizContext *s,
static void export_plane(FFTdnoizContext *s,
uint8_t *dstp, int dst_linesize,
float *buffer, int buffer_linesize, int plane)
float *buffer, int buffer_linesize, int plane,
int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int depth = s->depth;
@ -320,12 +320,14 @@ static void export_plane(FFTdnoizContext *s,
const int noy = p->noy;
const int data_linesize = p->data_linesize / sizeof(FFTComplex);
const float scale = 1.f / (block * block);
FFTComplex *hdata = p->hdata;
FFTComplex *vdata = p->vdata;
const int slice_start = (noy * jobnr) / nb_jobs;
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
FFTComplex *hdata = p->hdata[jobnr];
FFTComplex *vdata = p->vdata[jobnr];
int x, y, i, j;
buffer_linesize /= sizeof(float);
for (y = 0; y < noy; y++) {
for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) {
const int woff = x == 0 ? 0 : hoverlap;
const int hoff = y == 0 ? 0 : hoverlap;
@ -338,8 +340,8 @@ static void export_plane(FFTdnoizContext *s,
hdst = hdata;
for (i = 0; i < block; i++) {
memcpy(ddst, bsrc, block * sizeof(FFTComplex));
av_fft_permute(p->ifft, ddst);
av_fft_calc(p->ifft, ddst);
av_fft_permute(s->ifft[jobnr], ddst);
av_fft_calc(s->ifft[jobnr], ddst);
for (j = 0; j < block; j++) {
hdst[j * data_linesize + i] = ddst[j];
}
@ -350,8 +352,8 @@ static void export_plane(FFTdnoizContext *s,
hdst = hdata + hoff * data_linesize;
for (i = 0; i < rh; i++) {
av_fft_permute(p->ifft, hdst);
av_fft_calc(p->ifft, hdst);
av_fft_permute(s->ifft[jobnr], hdst);
av_fft_calc(s->ifft[jobnr], hdst);
s->export_row(hdst + woff, dst, rw, scale, depth);
hdst += data_linesize;
@ -361,7 +363,8 @@ static void export_plane(FFTdnoizContext *s,
}
}
static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer)
static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer,
int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int block = p->b;
@ -369,13 +372,15 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
const int noy = p->noy;
const int buffer_linesize = p->buffer_linesize / sizeof(float);
const float sigma = s->sigma * s->sigma * block * block;
const int slice_start = (noy * jobnr) / nb_jobs;
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
const float limit = 1.f - s->amount;
float *cbuffer = p->buffer[CURRENT];
const float cfactor = sqrtf(3.f) * 0.5f;
const float scale = 1.f / 3.f;
int y, x, i, j;
for (y = 0; y < noy; y++) {
for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) {
float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
@ -421,7 +426,8 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
}
}
static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer,
int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int block = p->b;
@ -429,11 +435,13 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
const int noy = p->noy;
const int buffer_linesize = p->buffer_linesize / sizeof(float);
const float sigma = s->sigma * s->sigma * block * block;
const int slice_start = (noy * jobnr) / nb_jobs;
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
const float limit = 1.f - s->amount;
float *cbuffer = p->buffer[CURRENT];
int y, x, i, j;
for (y = 0; y < noy; y++) {
for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) {
float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
@ -473,7 +481,8 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
}
}
static void filter_plane2d(FFTdnoizContext *s, int plane)
static void filter_plane2d(FFTdnoizContext *s, int plane,
int jobnr, int nb_jobs)
{
PlaneContext *p = &s->planes[plane];
const int block = p->b;
@ -482,15 +491,16 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
const int buffer_linesize = p->buffer_linesize / 4;
const float sigma = s->sigma * s->sigma * block * block;
const float limit = 1.f - s->amount;
const int slice_start = (noy * jobnr) / nb_jobs;
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
float *buffer = p->buffer[CURRENT];
int y, x, i, j;
for (y = 0; y < noy; y++) {
for (x = 0; x < nox; x++) {
for (int y = slice_start; y < slice_end; y++) {
for (int x = 0; x < nox; x++) {
float *buff = buffer + buffer_linesize * y * block + x * block * 2;
for (i = 0; i < block; i++) {
for (j = 0; j < block; j++) {
for (int i = 0; i < block; i++) {
for (int j = 0; j < block; j++) {
float factor, power, re, im;
re = buff[j * 2 ];
@ -507,6 +517,79 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
}
}
static int import_pass(AVFilterContext *ctx, void *arg,
int jobnr, int nb_jobs)
{
FFTdnoizContext *s = ctx->priv;
for (int plane = 0; plane < s->nb_planes; plane++) {
PlaneContext *p = &s->planes[plane];
if (!((1 << plane) & s->planesf) || ctx->is_disabled)
continue;
if (s->next) {
import_plane(s, s->next->data[plane], s->next->linesize[plane],
p->buffer[NEXT], p->buffer_linesize, plane,
jobnr, nb_jobs);
}
if (s->prev) {
import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
p->buffer[PREV], p->buffer_linesize, plane,
jobnr, nb_jobs);
}
import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
p->buffer[CURRENT], p->buffer_linesize, plane,
jobnr, nb_jobs);
}
return 0;
}
static int filter_pass(AVFilterContext *ctx, void *arg,
int jobnr, int nb_jobs)
{
FFTdnoizContext *s = ctx->priv;
for (int plane = 0; plane < s->nb_planes; plane++) {
PlaneContext *p = &s->planes[plane];
if (!((1 << plane) & s->planesf) || ctx->is_disabled)
continue;
if (s->next && s->prev) {
filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT], jobnr, nb_jobs);
} else if (s->next) {
filter_plane3d1(s, plane, p->buffer[NEXT], jobnr, nb_jobs);
} else if (s->prev) {
filter_plane3d1(s, plane, p->buffer[PREV], jobnr, nb_jobs);
} else {
filter_plane2d(s, plane, jobnr, nb_jobs);
}
}
return 0;
}
static int export_pass(AVFilterContext *ctx, void *arg,
int jobnr, int nb_jobs)
{
FFTdnoizContext *s = ctx->priv;
AVFrame *out = arg;
for (int plane = 0; plane < s->nb_planes; plane++) {
PlaneContext *p = &s->planes[plane];
export_plane(s, out->data[plane], out->linesize[plane],
p->buffer[CURRENT], p->buffer_linesize, plane,
jobnr, nb_jobs);
}
return 0;
}
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
AVFilterContext *ctx = inlink->dst;
@ -559,6 +642,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
av_frame_copy_props(out, s->cur);
}
ff_filter_execute(ctx, import_pass, NULL, NULL,
FFMIN(s->planes[0].noy, s->nb_threads));
ff_filter_execute(ctx, filter_pass, NULL, NULL,
FFMIN(s->planes[0].noy, s->nb_threads));
ff_filter_execute(ctx, export_pass, out, NULL,
FFMIN(s->planes[0].noy, s->nb_threads));
for (plane = 0; plane < s->nb_planes; plane++) {
PlaneContext *p = &s->planes[plane];
@ -569,32 +661,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
p->planewidth, p->planeheight);
continue;
}
if (s->next) {
import_plane(s, s->next->data[plane], s->next->linesize[plane],
p->buffer[NEXT], p->buffer_linesize, plane);
}
if (s->prev) {
import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
p->buffer[PREV], p->buffer_linesize, plane);
}
import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
p->buffer[CURRENT], p->buffer_linesize, plane);
if (s->next && s->prev) {
filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT]);
} else if (s->next) {
filter_plane3d1(s, plane, p->buffer[NEXT]);
} else if (s->prev) {
filter_plane3d1(s, plane, p->buffer[PREV]);
} else {
filter_plane2d(s, plane);
}
export_plane(s, out->data[plane], out->linesize[plane],
p->buffer[CURRENT], p->buffer_linesize, plane);
}
if (s->nb_next == 0 && s->nb_prev == 0) {
@ -644,13 +710,19 @@ static av_cold void uninit(AVFilterContext *ctx)
for (i = 0; i < 4; i++) {
PlaneContext *p = &s->planes[i];
av_freep(&p->hdata);
av_freep(&p->vdata);
for (int j = 0; j < s->nb_threads; j++) {
av_freep(&p->hdata[j]);
av_freep(&p->vdata[j]);
}
av_freep(&p->buffer[PREV]);
av_freep(&p->buffer[CURRENT]);
av_freep(&p->buffer[NEXT]);
av_fft_end(p->fft);
av_fft_end(p->ifft);
}
for (i = 0; i < s->nb_threads; i++) {
av_fft_end(s->fft[i]);
av_fft_end(s->ifft[i]);
}
av_frame_free(&s->prev);
@ -679,11 +751,11 @@ const AVFilter ff_vf_fftdnoiz = {
.name = "fftdnoiz",
.description = NULL_IF_CONFIG_SMALL("Denoise frames using 3D FFT."),
.priv_size = sizeof(FFTdnoizContext),
.init = init,
.uninit = uninit,
FILTER_INPUTS(fftdnoiz_inputs),
FILTER_OUTPUTS(fftdnoiz_outputs),
FILTER_PIXFMTS_ARRAY(pix_fmts),
.priv_class = &fftdnoiz_class,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
AVFILTER_FLAG_SLICE_THREADS,
};