avfilter/vf_fftdnoiz: add slice threading support

This commit is contained in:
Paul B Mahol 2021-10-18 18:03:16 +02:00
parent 762e18da3f
commit 374f2ac370
1 changed files with 154 additions and 82 deletions

View File

@ -25,6 +25,8 @@
#include "internal.h" #include "internal.h"
#include "libavcodec/avfft.h" #include "libavcodec/avfft.h"
#define MAX_THREADS 32
enum BufferTypes { enum BufferTypes {
CURRENT, CURRENT,
PREV, PREV,
@ -40,11 +42,9 @@ typedef struct PlaneContext {
float n; float n;
float *buffer[BSIZE]; float *buffer[BSIZE];
FFTComplex *hdata, *vdata; FFTComplex *hdata[MAX_THREADS], *vdata[MAX_THREADS];
int data_linesize; int data_linesize;
int buffer_linesize; int buffer_linesize;
FFTContext *fft, *ifft;
} PlaneContext; } PlaneContext;
typedef struct FFTdnoizContext { typedef struct FFTdnoizContext {
@ -62,8 +62,11 @@ typedef struct FFTdnoizContext {
int depth; int depth;
int nb_planes; int nb_planes;
int nb_threads;
PlaneContext planes[4]; PlaneContext planes[4];
FFTContext *fft[MAX_THREADS], *ifft[MAX_THREADS];
void (*import_row)(FFTComplex *dst, uint8_t *src, int rw); void (*import_row)(FFTComplex *dst, uint8_t *src, int rw);
void (*export_row)(FFTComplex *src, uint8_t *dst, int rw, float scale, int depth); void (*export_row)(FFTComplex *src, uint8_t *dst, int rw, float scale, int depth);
} FFTdnoizContext; } FFTdnoizContext;
@ -90,23 +93,6 @@ static const AVOption fftdnoiz_options[] = {
AVFILTER_DEFINE_CLASS(fftdnoiz); AVFILTER_DEFINE_CLASS(fftdnoiz);
static av_cold int init(AVFilterContext *ctx)
{
FFTdnoizContext *s = ctx->priv;
int i;
for (i = 0; i < 4; i++) {
PlaneContext *p = &s->planes[i];
p->fft = av_fft_init(s->block_bits, 0);
p->ifft = av_fft_init(s->block_bits, 1);
if (!p->fft || !p->ifft)
return AVERROR(ENOMEM);
}
return 0;
}
static const enum AVPixelFormat pix_fmts[] = { static const enum AVPixelFormat pix_fmts[] = {
AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9,
AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
@ -201,6 +187,14 @@ static int config_input(AVFilterLink *inlink)
s->planes[0].planeheight = s->planes[3].planeheight = inlink->h; s->planes[0].planeheight = s->planes[3].planeheight = inlink->h;
s->nb_planes = av_pix_fmt_count_planes(inlink->format); s->nb_planes = av_pix_fmt_count_planes(inlink->format);
s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), MAX_THREADS);
for (int i = 0; i < s->nb_threads; i++) {
s->fft[i] = av_fft_init(s->block_bits, 0);
s->ifft[i] = av_fft_init(s->block_bits, 1);
if (!s->fft[i] || !s->ifft[i])
return AVERROR(ENOMEM);
}
for (i = 0; i < s->nb_planes; i++) { for (i = 0; i < s->nb_planes; i++) {
PlaneContext *p = &s->planes[i]; PlaneContext *p = &s->planes[i];
@ -230,18 +224,21 @@ static int config_input(AVFilterLink *inlink)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
p->data_linesize = 2 * p->b * sizeof(float); p->data_linesize = 2 * p->b * sizeof(float);
p->hdata = av_calloc(p->b, p->data_linesize); for (int j = 0; j < s->nb_threads; j++) {
p->vdata = av_calloc(p->b, p->data_linesize); p->hdata[j] = av_calloc(p->b, p->data_linesize);
if (!p->hdata || !p->vdata) p->vdata[j] = av_calloc(p->b, p->data_linesize);
if (!p->hdata[j] || !p->vdata[j])
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
}
return 0; return 0;
} }
static void import_plane(FFTdnoizContext *s, static void import_plane(FFTdnoizContext *s,
uint8_t *srcp, int src_linesize, uint8_t *srcp, int src_linesize,
float *buffer, int buffer_linesize, int plane) float *buffer, int buffer_linesize, int plane,
int jobnr, int nb_jobs)
{ {
PlaneContext *p = &s->planes[plane]; PlaneContext *p = &s->planes[plane];
const int width = p->planewidth; const int width = p->planewidth;
@ -253,12 +250,14 @@ static void import_plane(FFTdnoizContext *s,
const int noy = p->noy; const int noy = p->noy;
const int bpp = (s->depth + 7) / 8; const int bpp = (s->depth + 7) / 8;
const int data_linesize = p->data_linesize / sizeof(FFTComplex); const int data_linesize = p->data_linesize / sizeof(FFTComplex);
FFTComplex *hdata = p->hdata; const int slice_start = (noy * jobnr) / nb_jobs;
FFTComplex *vdata = p->vdata; const int slice_end = (noy * (jobnr+1)) / nb_jobs;
FFTComplex *hdata = p->hdata[jobnr];
FFTComplex *vdata = p->vdata[jobnr];
int x, y, i, j; int x, y, i, j;
buffer_linesize /= sizeof(float); buffer_linesize /= sizeof(float);
for (y = 0; y < noy; y++) { for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) { for (x = 0; x < nox; x++) {
const int rh = FFMIN(block, height - y * size); const int rh = FFMIN(block, height - y * size);
const int rw = FFMIN(block, width - x * size); const int rw = FFMIN(block, width - x * size);
@ -272,8 +271,8 @@ static void import_plane(FFTdnoizContext *s,
dst[j].re = dst[block - j - 1].re; dst[j].re = dst[block - j - 1].re;
dst[j].im = 0; dst[j].im = 0;
} }
av_fft_permute(p->fft, dst); av_fft_permute(s->fft[jobnr], dst);
av_fft_calc(p->fft, dst); av_fft_calc(s->fft[jobnr], dst);
src += src_linesize; src += src_linesize;
dst += data_linesize; dst += data_linesize;
@ -292,8 +291,8 @@ static void import_plane(FFTdnoizContext *s,
for (i = 0; i < block; i++) { for (i = 0; i < block; i++) {
for (j = 0; j < block; j++) for (j = 0; j < block; j++)
dst[j] = ssrc[j * data_linesize + i]; dst[j] = ssrc[j * data_linesize + i];
av_fft_permute(p->fft, dst); av_fft_permute(s->fft[jobnr], dst);
av_fft_calc(p->fft, dst); av_fft_calc(s->fft[jobnr], dst);
memcpy(bdst, dst, block * sizeof(FFTComplex)); memcpy(bdst, dst, block * sizeof(FFTComplex));
dst += data_linesize; dst += data_linesize;
@ -305,7 +304,8 @@ static void import_plane(FFTdnoizContext *s,
static void export_plane(FFTdnoizContext *s, static void export_plane(FFTdnoizContext *s,
uint8_t *dstp, int dst_linesize, uint8_t *dstp, int dst_linesize,
float *buffer, int buffer_linesize, int plane) float *buffer, int buffer_linesize, int plane,
int jobnr, int nb_jobs)
{ {
PlaneContext *p = &s->planes[plane]; PlaneContext *p = &s->planes[plane];
const int depth = s->depth; const int depth = s->depth;
@ -320,12 +320,14 @@ static void export_plane(FFTdnoizContext *s,
const int noy = p->noy; const int noy = p->noy;
const int data_linesize = p->data_linesize / sizeof(FFTComplex); const int data_linesize = p->data_linesize / sizeof(FFTComplex);
const float scale = 1.f / (block * block); const float scale = 1.f / (block * block);
FFTComplex *hdata = p->hdata; const int slice_start = (noy * jobnr) / nb_jobs;
FFTComplex *vdata = p->vdata; const int slice_end = (noy * (jobnr+1)) / nb_jobs;
FFTComplex *hdata = p->hdata[jobnr];
FFTComplex *vdata = p->vdata[jobnr];
int x, y, i, j; int x, y, i, j;
buffer_linesize /= sizeof(float); buffer_linesize /= sizeof(float);
for (y = 0; y < noy; y++) { for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) { for (x = 0; x < nox; x++) {
const int woff = x == 0 ? 0 : hoverlap; const int woff = x == 0 ? 0 : hoverlap;
const int hoff = y == 0 ? 0 : hoverlap; const int hoff = y == 0 ? 0 : hoverlap;
@ -338,8 +340,8 @@ static void export_plane(FFTdnoizContext *s,
hdst = hdata; hdst = hdata;
for (i = 0; i < block; i++) { for (i = 0; i < block; i++) {
memcpy(ddst, bsrc, block * sizeof(FFTComplex)); memcpy(ddst, bsrc, block * sizeof(FFTComplex));
av_fft_permute(p->ifft, ddst); av_fft_permute(s->ifft[jobnr], ddst);
av_fft_calc(p->ifft, ddst); av_fft_calc(s->ifft[jobnr], ddst);
for (j = 0; j < block; j++) { for (j = 0; j < block; j++) {
hdst[j * data_linesize + i] = ddst[j]; hdst[j * data_linesize + i] = ddst[j];
} }
@ -350,8 +352,8 @@ static void export_plane(FFTdnoizContext *s,
hdst = hdata + hoff * data_linesize; hdst = hdata + hoff * data_linesize;
for (i = 0; i < rh; i++) { for (i = 0; i < rh; i++) {
av_fft_permute(p->ifft, hdst); av_fft_permute(s->ifft[jobnr], hdst);
av_fft_calc(p->ifft, hdst); av_fft_calc(s->ifft[jobnr], hdst);
s->export_row(hdst + woff, dst, rw, scale, depth); s->export_row(hdst + woff, dst, rw, scale, depth);
hdst += data_linesize; hdst += data_linesize;
@ -361,7 +363,8 @@ static void export_plane(FFTdnoizContext *s,
} }
} }
static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer) static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer,
int jobnr, int nb_jobs)
{ {
PlaneContext *p = &s->planes[plane]; PlaneContext *p = &s->planes[plane];
const int block = p->b; const int block = p->b;
@ -369,13 +372,15 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
const int noy = p->noy; const int noy = p->noy;
const int buffer_linesize = p->buffer_linesize / sizeof(float); const int buffer_linesize = p->buffer_linesize / sizeof(float);
const float sigma = s->sigma * s->sigma * block * block; const float sigma = s->sigma * s->sigma * block * block;
const int slice_start = (noy * jobnr) / nb_jobs;
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
const float limit = 1.f - s->amount; const float limit = 1.f - s->amount;
float *cbuffer = p->buffer[CURRENT]; float *cbuffer = p->buffer[CURRENT];
const float cfactor = sqrtf(3.f) * 0.5f; const float cfactor = sqrtf(3.f) * 0.5f;
const float scale = 1.f / 3.f; const float scale = 1.f / 3.f;
int y, x, i, j; int y, x, i, j;
for (y = 0; y < noy; y++) { for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) { for (x = 0; x < nox; x++) {
float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2; float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2; float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
@ -421,7 +426,8 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
} }
} }
static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer) static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer,
int jobnr, int nb_jobs)
{ {
PlaneContext *p = &s->planes[plane]; PlaneContext *p = &s->planes[plane];
const int block = p->b; const int block = p->b;
@ -429,11 +435,13 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
const int noy = p->noy; const int noy = p->noy;
const int buffer_linesize = p->buffer_linesize / sizeof(float); const int buffer_linesize = p->buffer_linesize / sizeof(float);
const float sigma = s->sigma * s->sigma * block * block; const float sigma = s->sigma * s->sigma * block * block;
const int slice_start = (noy * jobnr) / nb_jobs;
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
const float limit = 1.f - s->amount; const float limit = 1.f - s->amount;
float *cbuffer = p->buffer[CURRENT]; float *cbuffer = p->buffer[CURRENT];
int y, x, i, j; int y, x, i, j;
for (y = 0; y < noy; y++) { for (y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) { for (x = 0; x < nox; x++) {
float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2; float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2; float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
@ -473,7 +481,8 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
} }
} }
static void filter_plane2d(FFTdnoizContext *s, int plane) static void filter_plane2d(FFTdnoizContext *s, int plane,
int jobnr, int nb_jobs)
{ {
PlaneContext *p = &s->planes[plane]; PlaneContext *p = &s->planes[plane];
const int block = p->b; const int block = p->b;
@ -482,15 +491,16 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
const int buffer_linesize = p->buffer_linesize / 4; const int buffer_linesize = p->buffer_linesize / 4;
const float sigma = s->sigma * s->sigma * block * block; const float sigma = s->sigma * s->sigma * block * block;
const float limit = 1.f - s->amount; const float limit = 1.f - s->amount;
const int slice_start = (noy * jobnr) / nb_jobs;
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
float *buffer = p->buffer[CURRENT]; float *buffer = p->buffer[CURRENT];
int y, x, i, j;
for (y = 0; y < noy; y++) { for (int y = slice_start; y < slice_end; y++) {
for (x = 0; x < nox; x++) { for (int x = 0; x < nox; x++) {
float *buff = buffer + buffer_linesize * y * block + x * block * 2; float *buff = buffer + buffer_linesize * y * block + x * block * 2;
for (i = 0; i < block; i++) { for (int i = 0; i < block; i++) {
for (j = 0; j < block; j++) { for (int j = 0; j < block; j++) {
float factor, power, re, im; float factor, power, re, im;
re = buff[j * 2 ]; re = buff[j * 2 ];
@ -507,6 +517,79 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
} }
} }
static int import_pass(AVFilterContext *ctx, void *arg,
int jobnr, int nb_jobs)
{
FFTdnoizContext *s = ctx->priv;
for (int plane = 0; plane < s->nb_planes; plane++) {
PlaneContext *p = &s->planes[plane];
if (!((1 << plane) & s->planesf) || ctx->is_disabled)
continue;
if (s->next) {
import_plane(s, s->next->data[plane], s->next->linesize[plane],
p->buffer[NEXT], p->buffer_linesize, plane,
jobnr, nb_jobs);
}
if (s->prev) {
import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
p->buffer[PREV], p->buffer_linesize, plane,
jobnr, nb_jobs);
}
import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
p->buffer[CURRENT], p->buffer_linesize, plane,
jobnr, nb_jobs);
}
return 0;
}
static int filter_pass(AVFilterContext *ctx, void *arg,
int jobnr, int nb_jobs)
{
FFTdnoizContext *s = ctx->priv;
for (int plane = 0; plane < s->nb_planes; plane++) {
PlaneContext *p = &s->planes[plane];
if (!((1 << plane) & s->planesf) || ctx->is_disabled)
continue;
if (s->next && s->prev) {
filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT], jobnr, nb_jobs);
} else if (s->next) {
filter_plane3d1(s, plane, p->buffer[NEXT], jobnr, nb_jobs);
} else if (s->prev) {
filter_plane3d1(s, plane, p->buffer[PREV], jobnr, nb_jobs);
} else {
filter_plane2d(s, plane, jobnr, nb_jobs);
}
}
return 0;
}
static int export_pass(AVFilterContext *ctx, void *arg,
int jobnr, int nb_jobs)
{
FFTdnoizContext *s = ctx->priv;
AVFrame *out = arg;
for (int plane = 0; plane < s->nb_planes; plane++) {
PlaneContext *p = &s->planes[plane];
export_plane(s, out->data[plane], out->linesize[plane],
p->buffer[CURRENT], p->buffer_linesize, plane,
jobnr, nb_jobs);
}
return 0;
}
static int filter_frame(AVFilterLink *inlink, AVFrame *in) static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{ {
AVFilterContext *ctx = inlink->dst; AVFilterContext *ctx = inlink->dst;
@ -559,6 +642,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
av_frame_copy_props(out, s->cur); av_frame_copy_props(out, s->cur);
} }
ff_filter_execute(ctx, import_pass, NULL, NULL,
FFMIN(s->planes[0].noy, s->nb_threads));
ff_filter_execute(ctx, filter_pass, NULL, NULL,
FFMIN(s->planes[0].noy, s->nb_threads));
ff_filter_execute(ctx, export_pass, out, NULL,
FFMIN(s->planes[0].noy, s->nb_threads));
for (plane = 0; plane < s->nb_planes; plane++) { for (plane = 0; plane < s->nb_planes; plane++) {
PlaneContext *p = &s->planes[plane]; PlaneContext *p = &s->planes[plane];
@ -569,32 +661,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
p->planewidth, p->planeheight); p->planewidth, p->planeheight);
continue; continue;
} }
if (s->next) {
import_plane(s, s->next->data[plane], s->next->linesize[plane],
p->buffer[NEXT], p->buffer_linesize, plane);
}
if (s->prev) {
import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
p->buffer[PREV], p->buffer_linesize, plane);
}
import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
p->buffer[CURRENT], p->buffer_linesize, plane);
if (s->next && s->prev) {
filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT]);
} else if (s->next) {
filter_plane3d1(s, plane, p->buffer[NEXT]);
} else if (s->prev) {
filter_plane3d1(s, plane, p->buffer[PREV]);
} else {
filter_plane2d(s, plane);
}
export_plane(s, out->data[plane], out->linesize[plane],
p->buffer[CURRENT], p->buffer_linesize, plane);
} }
if (s->nb_next == 0 && s->nb_prev == 0) { if (s->nb_next == 0 && s->nb_prev == 0) {
@ -644,13 +710,19 @@ static av_cold void uninit(AVFilterContext *ctx)
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
PlaneContext *p = &s->planes[i]; PlaneContext *p = &s->planes[i];
av_freep(&p->hdata); for (int j = 0; j < s->nb_threads; j++) {
av_freep(&p->vdata); av_freep(&p->hdata[j]);
av_freep(&p->vdata[j]);
}
av_freep(&p->buffer[PREV]); av_freep(&p->buffer[PREV]);
av_freep(&p->buffer[CURRENT]); av_freep(&p->buffer[CURRENT]);
av_freep(&p->buffer[NEXT]); av_freep(&p->buffer[NEXT]);
av_fft_end(p->fft); }
av_fft_end(p->ifft);
for (i = 0; i < s->nb_threads; i++) {
av_fft_end(s->fft[i]);
av_fft_end(s->ifft[i]);
} }
av_frame_free(&s->prev); av_frame_free(&s->prev);
@ -679,11 +751,11 @@ const AVFilter ff_vf_fftdnoiz = {
.name = "fftdnoiz", .name = "fftdnoiz",
.description = NULL_IF_CONFIG_SMALL("Denoise frames using 3D FFT."), .description = NULL_IF_CONFIG_SMALL("Denoise frames using 3D FFT."),
.priv_size = sizeof(FFTdnoizContext), .priv_size = sizeof(FFTdnoizContext),
.init = init,
.uninit = uninit, .uninit = uninit,
FILTER_INPUTS(fftdnoiz_inputs), FILTER_INPUTS(fftdnoiz_inputs),
FILTER_OUTPUTS(fftdnoiz_outputs), FILTER_OUTPUTS(fftdnoiz_outputs),
FILTER_PIXFMTS_ARRAY(pix_fmts), FILTER_PIXFMTS_ARRAY(pix_fmts),
.priv_class = &fftdnoiz_class, .priv_class = &fftdnoiz_class,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
AVFILTER_FLAG_SLICE_THREADS,
}; };