mirror of https://git.ffmpeg.org/ffmpeg.git
avfilter/vf_adadenoise: add x86 SIMD
This commit is contained in:
parent
eb17bf6fd3
commit
295d99b439
|
@ -0,0 +1,36 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2019 Paul B Mahol
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AVFILTER_ATADENOISE_H
|
||||||
|
#define AVFILTER_ATADENOISE_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
typedef struct ATADenoiseDSPContext {
|
||||||
|
void (*filter_row)(const uint8_t *src, uint8_t *dst,
|
||||||
|
const uint8_t **srcf,
|
||||||
|
int w, int mid, int size,
|
||||||
|
int thra, int thrb);
|
||||||
|
} ATADenoiseDSPContext;
|
||||||
|
|
||||||
|
void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth);
|
||||||
|
|
||||||
|
#endif /* AVFILTER_ATADENOISE_H */
|
|
@ -33,6 +33,7 @@
|
||||||
#define FF_BUFQUEUE_SIZE 129
|
#define FF_BUFQUEUE_SIZE 129
|
||||||
#include "bufferqueue.h"
|
#include "bufferqueue.h"
|
||||||
|
|
||||||
|
#include "atadenoise.h"
|
||||||
#include "formats.h"
|
#include "formats.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
#include "video.h"
|
#include "video.h"
|
||||||
|
@ -57,10 +58,8 @@ typedef struct ATADenoiseContext {
|
||||||
int available;
|
int available;
|
||||||
|
|
||||||
int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
|
int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
|
||||||
void (*filter_row)(const uint8_t *src, uint8_t *dst,
|
|
||||||
const uint8_t *srcf[SIZE],
|
ATADenoiseDSPContext dsp;
|
||||||
int w, int mid, int size,
|
|
||||||
int thra, int thrb);
|
|
||||||
} ATADenoiseContext;
|
} ATADenoiseContext;
|
||||||
|
|
||||||
#define OFFSET(x) offsetof(ATADenoiseContext, x)
|
#define OFFSET(x) offsetof(ATADenoiseContext, x)
|
||||||
|
@ -209,7 +208,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
|
||||||
srcf[i] = data[i] + slice_start * linesize[i];
|
srcf[i] = data[i] + slice_start * linesize[i];
|
||||||
|
|
||||||
for (y = slice_start; y < slice_end; y++) {
|
for (y = slice_start; y < slice_end; y++) {
|
||||||
s->filter_row(src, dst, srcf, w, mid, size, thra, thrb);
|
s->dsp.filter_row(src, dst, srcf, w, mid, size, thra, thrb);
|
||||||
|
|
||||||
dst += out->linesize[p];
|
dst += out->linesize[p];
|
||||||
src += in->linesize[p];
|
src += in->linesize[p];
|
||||||
|
@ -239,9 +238,9 @@ static int config_input(AVFilterLink *inlink)
|
||||||
depth = desc->comp[0].depth;
|
depth = desc->comp[0].depth;
|
||||||
s->filter_slice = filter_slice;
|
s->filter_slice = filter_slice;
|
||||||
if (depth == 8)
|
if (depth == 8)
|
||||||
s->filter_row = filter_row8;
|
s->dsp.filter_row = filter_row8;
|
||||||
else
|
else
|
||||||
s->filter_row = filter_row16;
|
s->dsp.filter_row = filter_row16;
|
||||||
|
|
||||||
s->thra[0] = s->fthra[0] * (1 << depth) - 1;
|
s->thra[0] = s->fthra[0] * (1 << depth) - 1;
|
||||||
s->thra[1] = s->fthra[1] * (1 << depth) - 1;
|
s->thra[1] = s->fthra[1] * (1 << depth) - 1;
|
||||||
|
@ -250,6 +249,9 @@ static int config_input(AVFilterLink *inlink)
|
||||||
s->thrb[1] = s->fthrb[1] * (1 << depth) - 1;
|
s->thrb[1] = s->fthrb[1] * (1 << depth) - 1;
|
||||||
s->thrb[2] = s->fthrb[2] * (1 << depth) - 1;
|
s->thrb[2] = s->fthrb[2] * (1 << depth) - 1;
|
||||||
|
|
||||||
|
if (ARCH_X86)
|
||||||
|
ff_atadenoise_init_x86(&s->dsp, depth);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o
|
||||||
|
|
||||||
OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o
|
OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o
|
||||||
OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o
|
OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o
|
||||||
|
OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise_init.o
|
||||||
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
|
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
|
||||||
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
|
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
|
||||||
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
|
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
|
||||||
|
@ -39,6 +40,7 @@ X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o
|
||||||
|
|
||||||
X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o
|
X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o
|
||||||
X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o
|
X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o
|
||||||
|
X86ASM-OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise.o
|
||||||
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
|
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
|
||||||
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
|
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
|
||||||
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
|
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
|
||||||
|
|
|
@ -0,0 +1,154 @@
|
||||||
|
;*****************************************************************************
|
||||||
|
;* x86-optimized functions for blend filter
|
||||||
|
;*
|
||||||
|
;* Copyright (C) 2019 Paul B Mahol
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%if ARCH_X86_64
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
SECTION_RODATA
|
||||||
|
pw_one: times 8 dw 1
|
||||||
|
pw_ones: times 8 dw 65535
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
; void ff_filter_row(const uint8_t *src, uint8_t *dst,
|
||||||
|
; const uint8_t **srcf,
|
||||||
|
; int w, int mid, int size,
|
||||||
|
; int thra, int thrb)
|
||||||
|
;------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
INIT_XMM sse4
|
||||||
|
cglobal atadenoise_filter_row8, 8,10,13, src, dst, srcf, w, mid, size, i, j, srcfx, x
|
||||||
|
movsxdifnidn wq, wd
|
||||||
|
movsxdifnidn midq, midd
|
||||||
|
movsxdifnidn sizeq, sized
|
||||||
|
add srcq, wq
|
||||||
|
add dstq, wq
|
||||||
|
mov xq, wq
|
||||||
|
dec sizeq
|
||||||
|
neg xq
|
||||||
|
movd m4, r6m
|
||||||
|
SPLATW m4, m4
|
||||||
|
movd m5, r7m
|
||||||
|
SPLATW m5, m5
|
||||||
|
pxor m2, m2
|
||||||
|
mova m10, [pw_ones]
|
||||||
|
|
||||||
|
.loop:
|
||||||
|
mov iq, midq
|
||||||
|
mov jq, midq
|
||||||
|
pxor m3, m3
|
||||||
|
pxor m11, m11
|
||||||
|
movu m0, [srcq + xq]
|
||||||
|
punpcklbw m0, m2
|
||||||
|
mova m7, m0
|
||||||
|
mova m8, [pw_one]
|
||||||
|
mova m12, [pw_ones]
|
||||||
|
|
||||||
|
.loop0:
|
||||||
|
inc iq
|
||||||
|
dec jq
|
||||||
|
|
||||||
|
mov srcfxq, [srcfq + jq * 8]
|
||||||
|
add srcfxq, wq
|
||||||
|
|
||||||
|
movu m1, [srcfxq + xq]
|
||||||
|
punpcklbw m1, m2
|
||||||
|
mova m9, m1
|
||||||
|
psubw m1, m0
|
||||||
|
pabsw m1, m1
|
||||||
|
paddw m11, m1
|
||||||
|
pcmpgtw m1, m4
|
||||||
|
mova m6, m11
|
||||||
|
pcmpgtw m6, m5
|
||||||
|
por m6, m1
|
||||||
|
pxor m6, m10
|
||||||
|
pand m12, m6
|
||||||
|
pand m9, m12
|
||||||
|
paddw m7, m9
|
||||||
|
mova m6, m12
|
||||||
|
psrlw m6, 15
|
||||||
|
paddw m8, m6
|
||||||
|
|
||||||
|
mov srcfxq, [srcfq + iq * 8]
|
||||||
|
add srcfxq, wq
|
||||||
|
|
||||||
|
movu m1, [srcfxq + xq]
|
||||||
|
punpcklbw m1, m2
|
||||||
|
mova m9, m1
|
||||||
|
psubw m1, m0
|
||||||
|
pabsw m1, m1
|
||||||
|
paddw m3, m1
|
||||||
|
pcmpgtw m1, m4
|
||||||
|
mova m6, m3
|
||||||
|
pcmpgtw m6, m5
|
||||||
|
por m6, m1
|
||||||
|
pxor m6, m10
|
||||||
|
pand m12, m6
|
||||||
|
pand m9, m12
|
||||||
|
paddw m7, m9
|
||||||
|
mova m6, m12
|
||||||
|
psrlw m6, 15
|
||||||
|
paddw m8, m6
|
||||||
|
|
||||||
|
ptest m12, m12
|
||||||
|
jz .finish
|
||||||
|
|
||||||
|
cmp iq, sizeq
|
||||||
|
jl .loop0
|
||||||
|
|
||||||
|
.finish:
|
||||||
|
mova m9, m8
|
||||||
|
psrlw m9, 1
|
||||||
|
paddw m7, m9
|
||||||
|
|
||||||
|
mova m1, m7
|
||||||
|
mova m6, m8
|
||||||
|
|
||||||
|
punpcklwd m7, m2
|
||||||
|
punpcklwd m8, m2
|
||||||
|
cvtdq2ps m7, m7
|
||||||
|
cvtdq2ps m8, m8
|
||||||
|
divps m7, m8
|
||||||
|
cvttps2dq m7, m7
|
||||||
|
packssdw m7, m7
|
||||||
|
packuswb m7, m7
|
||||||
|
|
||||||
|
movd [dstq + xq], m7
|
||||||
|
|
||||||
|
punpckhwd m1, m2
|
||||||
|
punpckhwd m6, m2
|
||||||
|
cvtdq2ps m1, m1
|
||||||
|
cvtdq2ps m6, m6
|
||||||
|
divps m1, m6
|
||||||
|
cvttps2dq m1, m1
|
||||||
|
packssdw m1, m1
|
||||||
|
packuswb m1, m1
|
||||||
|
|
||||||
|
movd [dstq + xq + 4], m1
|
||||||
|
|
||||||
|
add xq, mmsize/2
|
||||||
|
jl .loop
|
||||||
|
RET
|
||||||
|
|
||||||
|
%endif
|
|
@ -0,0 +1,40 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2019 Paul B Mahol
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/mem.h"
|
||||||
|
#include "libavutil/x86/asm.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavfilter/atadenoise.h"
|
||||||
|
|
||||||
|
void ff_atadenoise_filter_row8_sse4(const uint8_t *src, uint8_t *dst,
|
||||||
|
const uint8_t **srcf,
|
||||||
|
int w, int mid, int size,
|
||||||
|
int thra, int thrb);
|
||||||
|
|
||||||
|
av_cold void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8) {
|
||||||
|
dsp->filter_row = ff_atadenoise_filter_row8_sse4;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue