mirror of https://git.ffmpeg.org/ffmpeg.git
avfilter/vf_fftdnoiz: add slice threading support
This commit is contained in:
parent
762e18da3f
commit
374f2ac370
|
@ -25,6 +25,8 @@
|
|||
#include "internal.h"
|
||||
#include "libavcodec/avfft.h"
|
||||
|
||||
#define MAX_THREADS 32
|
||||
|
||||
enum BufferTypes {
|
||||
CURRENT,
|
||||
PREV,
|
||||
|
@ -40,11 +42,9 @@ typedef struct PlaneContext {
|
|||
float n;
|
||||
|
||||
float *buffer[BSIZE];
|
||||
FFTComplex *hdata, *vdata;
|
||||
FFTComplex *hdata[MAX_THREADS], *vdata[MAX_THREADS];
|
||||
int data_linesize;
|
||||
int buffer_linesize;
|
||||
|
||||
FFTContext *fft, *ifft;
|
||||
} PlaneContext;
|
||||
|
||||
typedef struct FFTdnoizContext {
|
||||
|
@ -62,8 +62,11 @@ typedef struct FFTdnoizContext {
|
|||
|
||||
int depth;
|
||||
int nb_planes;
|
||||
int nb_threads;
|
||||
PlaneContext planes[4];
|
||||
|
||||
FFTContext *fft[MAX_THREADS], *ifft[MAX_THREADS];
|
||||
|
||||
void (*import_row)(FFTComplex *dst, uint8_t *src, int rw);
|
||||
void (*export_row)(FFTComplex *src, uint8_t *dst, int rw, float scale, int depth);
|
||||
} FFTdnoizContext;
|
||||
|
@ -90,23 +93,6 @@ static const AVOption fftdnoiz_options[] = {
|
|||
|
||||
AVFILTER_DEFINE_CLASS(fftdnoiz);
|
||||
|
||||
static av_cold int init(AVFilterContext *ctx)
|
||||
{
|
||||
FFTdnoizContext *s = ctx->priv;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
PlaneContext *p = &s->planes[i];
|
||||
|
||||
p->fft = av_fft_init(s->block_bits, 0);
|
||||
p->ifft = av_fft_init(s->block_bits, 1);
|
||||
if (!p->fft || !p->ifft)
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const enum AVPixelFormat pix_fmts[] = {
|
||||
AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9,
|
||||
AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12,
|
||||
|
@ -201,6 +187,14 @@ static int config_input(AVFilterLink *inlink)
|
|||
s->planes[0].planeheight = s->planes[3].planeheight = inlink->h;
|
||||
|
||||
s->nb_planes = av_pix_fmt_count_planes(inlink->format);
|
||||
s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), MAX_THREADS);
|
||||
|
||||
for (int i = 0; i < s->nb_threads; i++) {
|
||||
s->fft[i] = av_fft_init(s->block_bits, 0);
|
||||
s->ifft[i] = av_fft_init(s->block_bits, 1);
|
||||
if (!s->fft[i] || !s->ifft[i])
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
for (i = 0; i < s->nb_planes; i++) {
|
||||
PlaneContext *p = &s->planes[i];
|
||||
|
@ -230,10 +224,12 @@ static int config_input(AVFilterLink *inlink)
|
|||
return AVERROR(ENOMEM);
|
||||
}
|
||||
p->data_linesize = 2 * p->b * sizeof(float);
|
||||
p->hdata = av_calloc(p->b, p->data_linesize);
|
||||
p->vdata = av_calloc(p->b, p->data_linesize);
|
||||
if (!p->hdata || !p->vdata)
|
||||
return AVERROR(ENOMEM);
|
||||
for (int j = 0; j < s->nb_threads; j++) {
|
||||
p->hdata[j] = av_calloc(p->b, p->data_linesize);
|
||||
p->vdata[j] = av_calloc(p->b, p->data_linesize);
|
||||
if (!p->hdata[j] || !p->vdata[j])
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -241,7 +237,8 @@ static int config_input(AVFilterLink *inlink)
|
|||
|
||||
static void import_plane(FFTdnoizContext *s,
|
||||
uint8_t *srcp, int src_linesize,
|
||||
float *buffer, int buffer_linesize, int plane)
|
||||
float *buffer, int buffer_linesize, int plane,
|
||||
int jobnr, int nb_jobs)
|
||||
{
|
||||
PlaneContext *p = &s->planes[plane];
|
||||
const int width = p->planewidth;
|
||||
|
@ -253,12 +250,14 @@ static void import_plane(FFTdnoizContext *s,
|
|||
const int noy = p->noy;
|
||||
const int bpp = (s->depth + 7) / 8;
|
||||
const int data_linesize = p->data_linesize / sizeof(FFTComplex);
|
||||
FFTComplex *hdata = p->hdata;
|
||||
FFTComplex *vdata = p->vdata;
|
||||
const int slice_start = (noy * jobnr) / nb_jobs;
|
||||
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
|
||||
FFTComplex *hdata = p->hdata[jobnr];
|
||||
FFTComplex *vdata = p->vdata[jobnr];
|
||||
int x, y, i, j;
|
||||
|
||||
buffer_linesize /= sizeof(float);
|
||||
for (y = 0; y < noy; y++) {
|
||||
for (y = slice_start; y < slice_end; y++) {
|
||||
for (x = 0; x < nox; x++) {
|
||||
const int rh = FFMIN(block, height - y * size);
|
||||
const int rw = FFMIN(block, width - x * size);
|
||||
|
@ -272,8 +271,8 @@ static void import_plane(FFTdnoizContext *s,
|
|||
dst[j].re = dst[block - j - 1].re;
|
||||
dst[j].im = 0;
|
||||
}
|
||||
av_fft_permute(p->fft, dst);
|
||||
av_fft_calc(p->fft, dst);
|
||||
av_fft_permute(s->fft[jobnr], dst);
|
||||
av_fft_calc(s->fft[jobnr], dst);
|
||||
|
||||
src += src_linesize;
|
||||
dst += data_linesize;
|
||||
|
@ -292,8 +291,8 @@ static void import_plane(FFTdnoizContext *s,
|
|||
for (i = 0; i < block; i++) {
|
||||
for (j = 0; j < block; j++)
|
||||
dst[j] = ssrc[j * data_linesize + i];
|
||||
av_fft_permute(p->fft, dst);
|
||||
av_fft_calc(p->fft, dst);
|
||||
av_fft_permute(s->fft[jobnr], dst);
|
||||
av_fft_calc(s->fft[jobnr], dst);
|
||||
memcpy(bdst, dst, block * sizeof(FFTComplex));
|
||||
|
||||
dst += data_linesize;
|
||||
|
@ -305,7 +304,8 @@ static void import_plane(FFTdnoizContext *s,
|
|||
|
||||
static void export_plane(FFTdnoizContext *s,
|
||||
uint8_t *dstp, int dst_linesize,
|
||||
float *buffer, int buffer_linesize, int plane)
|
||||
float *buffer, int buffer_linesize, int plane,
|
||||
int jobnr, int nb_jobs)
|
||||
{
|
||||
PlaneContext *p = &s->planes[plane];
|
||||
const int depth = s->depth;
|
||||
|
@ -320,12 +320,14 @@ static void export_plane(FFTdnoizContext *s,
|
|||
const int noy = p->noy;
|
||||
const int data_linesize = p->data_linesize / sizeof(FFTComplex);
|
||||
const float scale = 1.f / (block * block);
|
||||
FFTComplex *hdata = p->hdata;
|
||||
FFTComplex *vdata = p->vdata;
|
||||
const int slice_start = (noy * jobnr) / nb_jobs;
|
||||
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
|
||||
FFTComplex *hdata = p->hdata[jobnr];
|
||||
FFTComplex *vdata = p->vdata[jobnr];
|
||||
int x, y, i, j;
|
||||
|
||||
buffer_linesize /= sizeof(float);
|
||||
for (y = 0; y < noy; y++) {
|
||||
for (y = slice_start; y < slice_end; y++) {
|
||||
for (x = 0; x < nox; x++) {
|
||||
const int woff = x == 0 ? 0 : hoverlap;
|
||||
const int hoff = y == 0 ? 0 : hoverlap;
|
||||
|
@ -338,8 +340,8 @@ static void export_plane(FFTdnoizContext *s,
|
|||
hdst = hdata;
|
||||
for (i = 0; i < block; i++) {
|
||||
memcpy(ddst, bsrc, block * sizeof(FFTComplex));
|
||||
av_fft_permute(p->ifft, ddst);
|
||||
av_fft_calc(p->ifft, ddst);
|
||||
av_fft_permute(s->ifft[jobnr], ddst);
|
||||
av_fft_calc(s->ifft[jobnr], ddst);
|
||||
for (j = 0; j < block; j++) {
|
||||
hdst[j * data_linesize + i] = ddst[j];
|
||||
}
|
||||
|
@ -350,8 +352,8 @@ static void export_plane(FFTdnoizContext *s,
|
|||
|
||||
hdst = hdata + hoff * data_linesize;
|
||||
for (i = 0; i < rh; i++) {
|
||||
av_fft_permute(p->ifft, hdst);
|
||||
av_fft_calc(p->ifft, hdst);
|
||||
av_fft_permute(s->ifft[jobnr], hdst);
|
||||
av_fft_calc(s->ifft[jobnr], hdst);
|
||||
s->export_row(hdst + woff, dst, rw, scale, depth);
|
||||
|
||||
hdst += data_linesize;
|
||||
|
@ -361,7 +363,8 @@ static void export_plane(FFTdnoizContext *s,
|
|||
}
|
||||
}
|
||||
|
||||
static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer)
|
||||
static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float *nbuffer,
|
||||
int jobnr, int nb_jobs)
|
||||
{
|
||||
PlaneContext *p = &s->planes[plane];
|
||||
const int block = p->b;
|
||||
|
@ -369,13 +372,15 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
|
|||
const int noy = p->noy;
|
||||
const int buffer_linesize = p->buffer_linesize / sizeof(float);
|
||||
const float sigma = s->sigma * s->sigma * block * block;
|
||||
const int slice_start = (noy * jobnr) / nb_jobs;
|
||||
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
|
||||
const float limit = 1.f - s->amount;
|
||||
float *cbuffer = p->buffer[CURRENT];
|
||||
const float cfactor = sqrtf(3.f) * 0.5f;
|
||||
const float scale = 1.f / 3.f;
|
||||
int y, x, i, j;
|
||||
|
||||
for (y = 0; y < noy; y++) {
|
||||
for (y = slice_start; y < slice_end; y++) {
|
||||
for (x = 0; x < nox; x++) {
|
||||
float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
|
||||
float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
|
||||
|
@ -421,7 +426,8 @@ static void filter_plane3d2(FFTdnoizContext *s, int plane, float *pbuffer, float
|
|||
}
|
||||
}
|
||||
|
||||
static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
|
||||
static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer,
|
||||
int jobnr, int nb_jobs)
|
||||
{
|
||||
PlaneContext *p = &s->planes[plane];
|
||||
const int block = p->b;
|
||||
|
@ -429,11 +435,13 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
|
|||
const int noy = p->noy;
|
||||
const int buffer_linesize = p->buffer_linesize / sizeof(float);
|
||||
const float sigma = s->sigma * s->sigma * block * block;
|
||||
const int slice_start = (noy * jobnr) / nb_jobs;
|
||||
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
|
||||
const float limit = 1.f - s->amount;
|
||||
float *cbuffer = p->buffer[CURRENT];
|
||||
int y, x, i, j;
|
||||
|
||||
for (y = 0; y < noy; y++) {
|
||||
for (y = slice_start; y < slice_end; y++) {
|
||||
for (x = 0; x < nox; x++) {
|
||||
float *cbuff = cbuffer + buffer_linesize * y * block + x * block * 2;
|
||||
float *pbuff = pbuffer + buffer_linesize * y * block + x * block * 2;
|
||||
|
@ -473,7 +481,8 @@ static void filter_plane3d1(FFTdnoizContext *s, int plane, float *pbuffer)
|
|||
}
|
||||
}
|
||||
|
||||
static void filter_plane2d(FFTdnoizContext *s, int plane)
|
||||
static void filter_plane2d(FFTdnoizContext *s, int plane,
|
||||
int jobnr, int nb_jobs)
|
||||
{
|
||||
PlaneContext *p = &s->planes[plane];
|
||||
const int block = p->b;
|
||||
|
@ -482,15 +491,16 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
|
|||
const int buffer_linesize = p->buffer_linesize / 4;
|
||||
const float sigma = s->sigma * s->sigma * block * block;
|
||||
const float limit = 1.f - s->amount;
|
||||
const int slice_start = (noy * jobnr) / nb_jobs;
|
||||
const int slice_end = (noy * (jobnr+1)) / nb_jobs;
|
||||
float *buffer = p->buffer[CURRENT];
|
||||
int y, x, i, j;
|
||||
|
||||
for (y = 0; y < noy; y++) {
|
||||
for (x = 0; x < nox; x++) {
|
||||
for (int y = slice_start; y < slice_end; y++) {
|
||||
for (int x = 0; x < nox; x++) {
|
||||
float *buff = buffer + buffer_linesize * y * block + x * block * 2;
|
||||
|
||||
for (i = 0; i < block; i++) {
|
||||
for (j = 0; j < block; j++) {
|
||||
for (int i = 0; i < block; i++) {
|
||||
for (int j = 0; j < block; j++) {
|
||||
float factor, power, re, im;
|
||||
|
||||
re = buff[j * 2 ];
|
||||
|
@ -507,6 +517,79 @@ static void filter_plane2d(FFTdnoizContext *s, int plane)
|
|||
}
|
||||
}
|
||||
|
||||
static int import_pass(AVFilterContext *ctx, void *arg,
|
||||
int jobnr, int nb_jobs)
|
||||
{
|
||||
FFTdnoizContext *s = ctx->priv;
|
||||
|
||||
for (int plane = 0; plane < s->nb_planes; plane++) {
|
||||
PlaneContext *p = &s->planes[plane];
|
||||
|
||||
if (!((1 << plane) & s->planesf) || ctx->is_disabled)
|
||||
continue;
|
||||
|
||||
if (s->next) {
|
||||
import_plane(s, s->next->data[plane], s->next->linesize[plane],
|
||||
p->buffer[NEXT], p->buffer_linesize, plane,
|
||||
jobnr, nb_jobs);
|
||||
}
|
||||
|
||||
if (s->prev) {
|
||||
import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
|
||||
p->buffer[PREV], p->buffer_linesize, plane,
|
||||
jobnr, nb_jobs);
|
||||
}
|
||||
|
||||
import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
|
||||
p->buffer[CURRENT], p->buffer_linesize, plane,
|
||||
jobnr, nb_jobs);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int filter_pass(AVFilterContext *ctx, void *arg,
|
||||
int jobnr, int nb_jobs)
|
||||
{
|
||||
FFTdnoizContext *s = ctx->priv;
|
||||
|
||||
for (int plane = 0; plane < s->nb_planes; plane++) {
|
||||
PlaneContext *p = &s->planes[plane];
|
||||
|
||||
if (!((1 << plane) & s->planesf) || ctx->is_disabled)
|
||||
continue;
|
||||
|
||||
if (s->next && s->prev) {
|
||||
filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT], jobnr, nb_jobs);
|
||||
} else if (s->next) {
|
||||
filter_plane3d1(s, plane, p->buffer[NEXT], jobnr, nb_jobs);
|
||||
} else if (s->prev) {
|
||||
filter_plane3d1(s, plane, p->buffer[PREV], jobnr, nb_jobs);
|
||||
} else {
|
||||
filter_plane2d(s, plane, jobnr, nb_jobs);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int export_pass(AVFilterContext *ctx, void *arg,
|
||||
int jobnr, int nb_jobs)
|
||||
{
|
||||
FFTdnoizContext *s = ctx->priv;
|
||||
AVFrame *out = arg;
|
||||
|
||||
for (int plane = 0; plane < s->nb_planes; plane++) {
|
||||
PlaneContext *p = &s->planes[plane];
|
||||
|
||||
export_plane(s, out->data[plane], out->linesize[plane],
|
||||
p->buffer[CURRENT], p->buffer_linesize, plane,
|
||||
jobnr, nb_jobs);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
||||
{
|
||||
AVFilterContext *ctx = inlink->dst;
|
||||
|
@ -559,6 +642,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
|||
av_frame_copy_props(out, s->cur);
|
||||
}
|
||||
|
||||
ff_filter_execute(ctx, import_pass, NULL, NULL,
|
||||
FFMIN(s->planes[0].noy, s->nb_threads));
|
||||
|
||||
ff_filter_execute(ctx, filter_pass, NULL, NULL,
|
||||
FFMIN(s->planes[0].noy, s->nb_threads));
|
||||
|
||||
ff_filter_execute(ctx, export_pass, out, NULL,
|
||||
FFMIN(s->planes[0].noy, s->nb_threads));
|
||||
|
||||
for (plane = 0; plane < s->nb_planes; plane++) {
|
||||
PlaneContext *p = &s->planes[plane];
|
||||
|
||||
|
@ -569,32 +661,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
|
|||
p->planewidth, p->planeheight);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (s->next) {
|
||||
import_plane(s, s->next->data[plane], s->next->linesize[plane],
|
||||
p->buffer[NEXT], p->buffer_linesize, plane);
|
||||
}
|
||||
|
||||
if (s->prev) {
|
||||
import_plane(s, s->prev->data[plane], s->prev->linesize[plane],
|
||||
p->buffer[PREV], p->buffer_linesize, plane);
|
||||
}
|
||||
|
||||
import_plane(s, s->cur->data[plane], s->cur->linesize[plane],
|
||||
p->buffer[CURRENT], p->buffer_linesize, plane);
|
||||
|
||||
if (s->next && s->prev) {
|
||||
filter_plane3d2(s, plane, p->buffer[PREV], p->buffer[NEXT]);
|
||||
} else if (s->next) {
|
||||
filter_plane3d1(s, plane, p->buffer[NEXT]);
|
||||
} else if (s->prev) {
|
||||
filter_plane3d1(s, plane, p->buffer[PREV]);
|
||||
} else {
|
||||
filter_plane2d(s, plane);
|
||||
}
|
||||
|
||||
export_plane(s, out->data[plane], out->linesize[plane],
|
||||
p->buffer[CURRENT], p->buffer_linesize, plane);
|
||||
}
|
||||
|
||||
if (s->nb_next == 0 && s->nb_prev == 0) {
|
||||
|
@ -644,13 +710,19 @@ static av_cold void uninit(AVFilterContext *ctx)
|
|||
for (i = 0; i < 4; i++) {
|
||||
PlaneContext *p = &s->planes[i];
|
||||
|
||||
av_freep(&p->hdata);
|
||||
av_freep(&p->vdata);
|
||||
for (int j = 0; j < s->nb_threads; j++) {
|
||||
av_freep(&p->hdata[j]);
|
||||
av_freep(&p->vdata[j]);
|
||||
}
|
||||
|
||||
av_freep(&p->buffer[PREV]);
|
||||
av_freep(&p->buffer[CURRENT]);
|
||||
av_freep(&p->buffer[NEXT]);
|
||||
av_fft_end(p->fft);
|
||||
av_fft_end(p->ifft);
|
||||
}
|
||||
|
||||
for (i = 0; i < s->nb_threads; i++) {
|
||||
av_fft_end(s->fft[i]);
|
||||
av_fft_end(s->ifft[i]);
|
||||
}
|
||||
|
||||
av_frame_free(&s->prev);
|
||||
|
@ -679,11 +751,11 @@ const AVFilter ff_vf_fftdnoiz = {
|
|||
.name = "fftdnoiz",
|
||||
.description = NULL_IF_CONFIG_SMALL("Denoise frames using 3D FFT."),
|
||||
.priv_size = sizeof(FFTdnoizContext),
|
||||
.init = init,
|
||||
.uninit = uninit,
|
||||
FILTER_INPUTS(fftdnoiz_inputs),
|
||||
FILTER_OUTPUTS(fftdnoiz_outputs),
|
||||
FILTER_PIXFMTS_ARRAY(pix_fmts),
|
||||
.priv_class = &fftdnoiz_class,
|
||||
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
|
||||
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
|
||||
AVFILTER_FLAG_SLICE_THREADS,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue