mirror of https://git.ffmpeg.org/ffmpeg.git
avfilter/vf_adadenoise: add x86 SIMD
This commit is contained in:
parent
eb17bf6fd3
commit
295d99b439
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright (c) 2019 Paul B Mahol
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_ATADENOISE_H
|
||||
#define AVFILTER_ATADENOISE_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct ATADenoiseDSPContext {
|
||||
void (*filter_row)(const uint8_t *src, uint8_t *dst,
|
||||
const uint8_t **srcf,
|
||||
int w, int mid, int size,
|
||||
int thra, int thrb);
|
||||
} ATADenoiseDSPContext;
|
||||
|
||||
void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth);
|
||||
|
||||
#endif /* AVFILTER_ATADENOISE_H */
|
|
@ -33,6 +33,7 @@
|
|||
#define FF_BUFQUEUE_SIZE 129
|
||||
#include "bufferqueue.h"
|
||||
|
||||
#include "atadenoise.h"
|
||||
#include "formats.h"
|
||||
#include "internal.h"
|
||||
#include "video.h"
|
||||
|
@ -57,10 +58,8 @@ typedef struct ATADenoiseContext {
|
|||
int available;
|
||||
|
||||
int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
|
||||
void (*filter_row)(const uint8_t *src, uint8_t *dst,
|
||||
const uint8_t *srcf[SIZE],
|
||||
int w, int mid, int size,
|
||||
int thra, int thrb);
|
||||
|
||||
ATADenoiseDSPContext dsp;
|
||||
} ATADenoiseContext;
|
||||
|
||||
#define OFFSET(x) offsetof(ATADenoiseContext, x)
|
||||
|
@ -209,7 +208,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
|
|||
srcf[i] = data[i] + slice_start * linesize[i];
|
||||
|
||||
for (y = slice_start; y < slice_end; y++) {
|
||||
s->filter_row(src, dst, srcf, w, mid, size, thra, thrb);
|
||||
s->dsp.filter_row(src, dst, srcf, w, mid, size, thra, thrb);
|
||||
|
||||
dst += out->linesize[p];
|
||||
src += in->linesize[p];
|
||||
|
@ -239,9 +238,9 @@ static int config_input(AVFilterLink *inlink)
|
|||
depth = desc->comp[0].depth;
|
||||
s->filter_slice = filter_slice;
|
||||
if (depth == 8)
|
||||
s->filter_row = filter_row8;
|
||||
s->dsp.filter_row = filter_row8;
|
||||
else
|
||||
s->filter_row = filter_row16;
|
||||
s->dsp.filter_row = filter_row16;
|
||||
|
||||
s->thra[0] = s->fthra[0] * (1 << depth) - 1;
|
||||
s->thra[1] = s->fthra[1] * (1 << depth) - 1;
|
||||
|
@ -250,6 +249,9 @@ static int config_input(AVFilterLink *inlink)
|
|||
s->thrb[1] = s->fthrb[1] * (1 << depth) - 1;
|
||||
s->thrb[2] = s->fthrb[2] * (1 << depth) - 1;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_atadenoise_init_x86(&s->dsp, depth);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad_init.o
|
|||
|
||||
OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir_init.o
|
||||
OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn_init.o
|
||||
OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise_init.o
|
||||
OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
|
||||
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
|
||||
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
|
||||
|
@ -39,6 +40,7 @@ X86ASM-OBJS-$(CONFIG_SCENE_SAD) += x86/scene_sad.o
|
|||
|
||||
X86ASM-OBJS-$(CONFIG_AFIR_FILTER) += x86/af_afir.o
|
||||
X86ASM-OBJS-$(CONFIG_ANLMDN_FILTER) += x86/af_anlmdn.o
|
||||
X86ASM-OBJS-$(CONFIG_ATADENOISE_FILTER) += x86/vf_atadenoise.o
|
||||
X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
|
||||
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
|
||||
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
;*****************************************************************************
|
||||
;* x86-optimized functions for blend filter
|
||||
;*
|
||||
;* Copyright (C) 2019 Paul B Mahol
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
pw_one: times 8 dw 1
|
||||
pw_ones: times 8 dw 65535
|
||||
|
||||
SECTION .text
|
||||
|
||||
;------------------------------------------------------------------------------
|
||||
; void ff_filter_row(const uint8_t *src, uint8_t *dst,
|
||||
; const uint8_t **srcf,
|
||||
; int w, int mid, int size,
|
||||
; int thra, int thrb)
|
||||
;------------------------------------------------------------------------------
|
||||
|
||||
INIT_XMM sse4
|
||||
cglobal atadenoise_filter_row8, 8,10,13, src, dst, srcf, w, mid, size, i, j, srcfx, x
|
||||
movsxdifnidn wq, wd
|
||||
movsxdifnidn midq, midd
|
||||
movsxdifnidn sizeq, sized
|
||||
add srcq, wq
|
||||
add dstq, wq
|
||||
mov xq, wq
|
||||
dec sizeq
|
||||
neg xq
|
||||
movd m4, r6m
|
||||
SPLATW m4, m4
|
||||
movd m5, r7m
|
||||
SPLATW m5, m5
|
||||
pxor m2, m2
|
||||
mova m10, [pw_ones]
|
||||
|
||||
.loop:
|
||||
mov iq, midq
|
||||
mov jq, midq
|
||||
pxor m3, m3
|
||||
pxor m11, m11
|
||||
movu m0, [srcq + xq]
|
||||
punpcklbw m0, m2
|
||||
mova m7, m0
|
||||
mova m8, [pw_one]
|
||||
mova m12, [pw_ones]
|
||||
|
||||
.loop0:
|
||||
inc iq
|
||||
dec jq
|
||||
|
||||
mov srcfxq, [srcfq + jq * 8]
|
||||
add srcfxq, wq
|
||||
|
||||
movu m1, [srcfxq + xq]
|
||||
punpcklbw m1, m2
|
||||
mova m9, m1
|
||||
psubw m1, m0
|
||||
pabsw m1, m1
|
||||
paddw m11, m1
|
||||
pcmpgtw m1, m4
|
||||
mova m6, m11
|
||||
pcmpgtw m6, m5
|
||||
por m6, m1
|
||||
pxor m6, m10
|
||||
pand m12, m6
|
||||
pand m9, m12
|
||||
paddw m7, m9
|
||||
mova m6, m12
|
||||
psrlw m6, 15
|
||||
paddw m8, m6
|
||||
|
||||
mov srcfxq, [srcfq + iq * 8]
|
||||
add srcfxq, wq
|
||||
|
||||
movu m1, [srcfxq + xq]
|
||||
punpcklbw m1, m2
|
||||
mova m9, m1
|
||||
psubw m1, m0
|
||||
pabsw m1, m1
|
||||
paddw m3, m1
|
||||
pcmpgtw m1, m4
|
||||
mova m6, m3
|
||||
pcmpgtw m6, m5
|
||||
por m6, m1
|
||||
pxor m6, m10
|
||||
pand m12, m6
|
||||
pand m9, m12
|
||||
paddw m7, m9
|
||||
mova m6, m12
|
||||
psrlw m6, 15
|
||||
paddw m8, m6
|
||||
|
||||
ptest m12, m12
|
||||
jz .finish
|
||||
|
||||
cmp iq, sizeq
|
||||
jl .loop0
|
||||
|
||||
.finish:
|
||||
mova m9, m8
|
||||
psrlw m9, 1
|
||||
paddw m7, m9
|
||||
|
||||
mova m1, m7
|
||||
mova m6, m8
|
||||
|
||||
punpcklwd m7, m2
|
||||
punpcklwd m8, m2
|
||||
cvtdq2ps m7, m7
|
||||
cvtdq2ps m8, m8
|
||||
divps m7, m8
|
||||
cvttps2dq m7, m7
|
||||
packssdw m7, m7
|
||||
packuswb m7, m7
|
||||
|
||||
movd [dstq + xq], m7
|
||||
|
||||
punpckhwd m1, m2
|
||||
punpckhwd m6, m2
|
||||
cvtdq2ps m1, m1
|
||||
cvtdq2ps m6, m6
|
||||
divps m1, m6
|
||||
cvttps2dq m1, m1
|
||||
packssdw m1, m1
|
||||
packuswb m1, m1
|
||||
|
||||
movd [dstq + xq + 4], m1
|
||||
|
||||
add xq, mmsize/2
|
||||
jl .loop
|
||||
RET
|
||||
|
||||
%endif
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Paul B Mahol
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/mem.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavfilter/atadenoise.h"
|
||||
|
||||
void ff_atadenoise_filter_row8_sse4(const uint8_t *src, uint8_t *dst,
|
||||
const uint8_t **srcf,
|
||||
int w, int mid, int size,
|
||||
int thra, int thrb);
|
||||
|
||||
av_cold void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8) {
|
||||
dsp->filter_row = ff_atadenoise_filter_row8_sse4;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue