mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-12 10:29:39 +00:00
avfilter: add anlmdn filter x86 SIMD optimizations
This commit is contained in:
parent
02b6d1dd63
commit
dcae5ba322
@ -27,6 +27,8 @@
|
||||
#include "audio.h"
|
||||
#include "formats.h"
|
||||
|
||||
#include "af_anlmdndsp.h"
|
||||
|
||||
#define SQR(x) ((x) * (x))
|
||||
|
||||
typedef struct AudioNLMeansContext {
|
||||
@ -49,7 +51,7 @@ typedef struct AudioNLMeansContext {
|
||||
|
||||
AVAudioFifo *fifo;
|
||||
|
||||
float (*compute_distance)(const float *f1, const float *f2, int K);
|
||||
AudioNLMDNDSPContext dsp;
|
||||
} AudioNLMeansContext;
|
||||
|
||||
#define OFFSET(x) offsetof(AudioNLMeansContext, x)
|
||||
@ -93,7 +95,7 @@ static int query_formats(AVFilterContext *ctx)
|
||||
return ff_set_common_samplerates(ctx, formats);
|
||||
}
|
||||
|
||||
static float compute_distance_ssd(const float *f1, const float *f2, int K)
|
||||
static float compute_distance_ssd_c(const float *f1, const float *f2, ptrdiff_t K)
|
||||
{
|
||||
float distance = 0.;
|
||||
|
||||
@ -103,6 +105,25 @@ static float compute_distance_ssd(const float *f1, const float *f2, int K)
|
||||
return distance;
|
||||
}
|
||||
|
||||
static void compute_cache_c(float *cache, const float *f,
|
||||
ptrdiff_t S, ptrdiff_t K,
|
||||
ptrdiff_t i, ptrdiff_t jj)
|
||||
{
|
||||
int v = 0;
|
||||
|
||||
for (int j = jj; j < jj + S; j++, v++)
|
||||
cache[v] += -SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]);
|
||||
}
|
||||
|
||||
void ff_anlmdn_init(AudioNLMDNDSPContext *dsp)
|
||||
{
|
||||
dsp->compute_distance_ssd = compute_distance_ssd_c;
|
||||
dsp->compute_cache = compute_cache_c;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_anlmdn_init_x86(dsp);
|
||||
}
|
||||
|
||||
static int config_output(AVFilterLink *outlink)
|
||||
{
|
||||
AVFilterContext *ctx = outlink->src;
|
||||
@ -129,7 +150,7 @@ static int config_output(AVFilterLink *outlink)
|
||||
if (!s->fifo)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->compute_distance = compute_distance_ssd;
|
||||
ff_anlmdn_init(&s->dsp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -153,17 +174,14 @@ static int filter_channel(AVFilterContext *ctx, void *arg, int ch, int nb_jobs)
|
||||
for (int j = i - S; j <= i + S; j++) {
|
||||
if (i == j)
|
||||
continue;
|
||||
cache[v++] = s->compute_distance(f + i, f + j, K);
|
||||
cache[v++] = s->dsp.compute_distance_ssd(f + i, f + j, K);
|
||||
}
|
||||
} else {
|
||||
for (int j = i - S; j < i; j++, v++)
|
||||
cache[v] = cache[v] - SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]);
|
||||
|
||||
for (int j = i + 1; j <= i + S; j++, v++)
|
||||
cache[v] = cache[v] - SQR(f[i - K - 1] - f[j - K - 1]) + SQR(f[i + K] - f[j + K]);
|
||||
s->dsp.compute_cache(cache, f, S, K, i, i - S);
|
||||
s->dsp.compute_cache(cache + S, f, S, K, i, i + 1);
|
||||
}
|
||||
|
||||
for (int j = 0; j < v; j++) {
|
||||
for (int j = 0; j < 2 * S; j++) {
|
||||
const float distance = cache[j];
|
||||
float w;
|
||||
|
||||
|
40
libavfilter/af_anlmdndsp.h
Normal file
40
libavfilter/af_anlmdndsp.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2019 Paul B Mahol
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_ANLMDNDSP_H
|
||||
#define AVFILTER_ANLMDNDSP_H
|
||||
|
||||
#include "libavutil/common.h"
|
||||
|
||||
#include "audio.h"
|
||||
#include "avfilter.h"
|
||||
#include "formats.h"
|
||||
#include "internal.h"
|
||||
|
||||
typedef struct AudioNLMDNDSPContext {
|
||||
float (*compute_distance_ssd)(const float *f1, const float *f2, ptrdiff_t K);
|
||||
void (*compute_cache)(float *cache, const float *f, ptrdiff_t S, ptrdiff_t K,
|
||||
ptrdiff_t i, ptrdiff_t jj);
|
||||
} AudioNLMDNDSPContext;
|
||||
|
||||
void ff_anlmdn_init(AudioNLMDNDSPContext *s);
|
||||
void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s);
|
||||
|
||||
#endif /* AVFILTER_ANLMDNDSP_H */
|
@ -1,6 +1,7 @@
|
||||
OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o
|
||||
|
||||
OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o
|
||||
OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o
|
||||
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
|
||||
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
|
||||
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
|
||||
@ -34,6 +35,7 @@ OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
|
||||
X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o
|
||||
|
||||
X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o
|
||||
X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o
|
||||
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
|
||||
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
|
||||
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
|
||||
|
80
libavfilter/x86/af_anlmdn.asm
Normal file
80
libavfilter/x86/af_anlmdn.asm
Normal file
@ -0,0 +1,80 @@
|
||||
;*****************************************************************************
|
||||
;* x86-optimized functions for anlmdn filter
|
||||
;* Copyright (c) 2017 Paul B Mahol
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; float ff_compute_distance_ssd(float *f1, const float *f2, ptrdiff_t len)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
INIT_XMM sse
|
||||
cglobal compute_distance_ssd, 3,5,3, f1, f2, len, r, x
|
||||
mov xq, lenq
|
||||
shl xq, 2
|
||||
neg xq
|
||||
add f1q, xq
|
||||
add f2q, xq
|
||||
xor xq, xq
|
||||
shl lenq, 1
|
||||
add lenq, 1
|
||||
shl lenq, 2
|
||||
mov rq, lenq
|
||||
and rq, mmsize - 1
|
||||
xorps m0, m0
|
||||
cmp lenq, mmsize
|
||||
jl .loop1
|
||||
sub lenq, rq
|
||||
ALIGN 16
|
||||
.loop0:
|
||||
movups m1, [f1q + xq]
|
||||
movups m2, [f2q + xq]
|
||||
subps m1, m2
|
||||
mulps m1, m1
|
||||
addps m0, m1
|
||||
add xq, mmsize
|
||||
cmp xq, lenq
|
||||
jl .loop0
|
||||
|
||||
movhlps xmm1, xmm0
|
||||
addps xmm0, xmm1
|
||||
movss xmm1, xmm0
|
||||
shufps xmm0, xmm0, 1
|
||||
addss xmm0, xmm1
|
||||
|
||||
cmp rq, 0
|
||||
je .end
|
||||
add lenq, rq
|
||||
.loop1:
|
||||
movss xm1, [f1q + xq]
|
||||
subss xm1, [f2q + xq]
|
||||
mulss xm1, xm1
|
||||
addss xm0, xm1
|
||||
add xq, 4
|
||||
cmp xq, lenq
|
||||
jl .loop1
|
||||
.end:
|
||||
%if ARCH_X86_64 == 0
|
||||
movss r0m, xm0
|
||||
fld dword r0m
|
||||
%endif
|
||||
RET
|
35
libavfilter/x86/af_anlmdn_init.c
Normal file
35
libavfilter/x86/af_anlmdn_init.c
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavfilter/af_anlmdndsp.h"
|
||||
|
||||
float ff_compute_distance_ssd_sse(const float *f1, const float *f2,
|
||||
ptrdiff_t len);
|
||||
|
||||
av_cold void ff_anlmdn_init_x86(AudioNLMDNDSPContext *s)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE(cpu_flags)) {
|
||||
s->compute_distance_ssd = ff_compute_distance_ssd_sse;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user