diff --git a/libavfilter/vf_noise.c b/libavfilter/vf_noise.c index 6218ed01af..4acad8a98b 100644 --- a/libavfilter/vf_noise.c +++ b/libavfilter/vf_noise.c @@ -29,43 +29,12 @@ #include "libavutil/lfg.h" #include "libavutil/parseutils.h" #include "libavutil/pixdesc.h" -#include "libavutil/x86/asm.h" #include "avfilter.h" #include "formats.h" #include "internal.h" +#include "vf_noise.h" #include "video.h" -#define MAX_NOISE 5120 -#define MAX_SHIFT 1024 -#define MAX_RES (MAX_NOISE-MAX_SHIFT) - -#define NOISE_UNIFORM 1 -#define NOISE_TEMPORAL 2 -#define NOISE_AVERAGED 8 -#define NOISE_PATTERN 16 - -typedef struct { - int strength; - unsigned flags; - AVLFG lfg; - int seed; - int8_t *noise; - int8_t *prev_shift[MAX_RES][3]; - int rand_shift[MAX_RES]; - int rand_shift_init; -} FilterParams; - -typedef struct { - const AVClass *class; - int nb_planes; - int bytewidth[4]; - int height[4]; - FilterParams all; - FilterParams param[4]; - void (*line_noise)(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift); - void (*line_noise_avg)(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift); -} NoiseContext; - typedef struct ThreadData { AVFrame *in, *out; } ThreadData; @@ -193,8 +162,8 @@ static int config_input(AVFilterLink *inlink) return 0; } -static inline void line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise, - int len, int shift) +void ff_line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise, + int len, int shift) { int i; @@ -206,70 +175,8 @@ static inline void line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t * } } -#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t" - -static void line_noise_mmx(uint8_t *dst, const uint8_t *src, - const int8_t *noise, int len, int shift) -{ -#if HAVE_MMX_INLINE - x86_reg mmx_len= len&(~7); - noise+=shift; - - __asm__ volatile( - "mov %3, %%"REG_a" \n\t" - "pcmpeqb %%mm7, %%mm7 \n\t" - "psllw $15, %%mm7 \n\t" - "packsswb %%mm7, %%mm7 \n\t" - ASMALIGN(4) - "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq (%1, %%"REG_a"), %%mm1 \n\t" - "pxor %%mm7, %%mm0 \n\t" - "paddsb %%mm1, %%mm0 \n\t" - "pxor %%mm7, %%mm0 \n\t" - "movq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) - : "%"REG_a - ); - if (mmx_len!=len) - line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); -#endif -} - -static void line_noise_mmxext(uint8_t *dst, const uint8_t *src, - const int8_t *noise, int len, int shift) -{ -#if HAVE_MMXEXT_INLINE - x86_reg mmx_len= len&(~7); - noise+=shift; - - __asm__ volatile( - "mov %3, %%"REG_a" \n\t" - "pcmpeqb %%mm7, %%mm7 \n\t" - "psllw $15, %%mm7 \n\t" - "packsswb %%mm7, %%mm7 \n\t" - ASMALIGN(4) - "1: \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq (%1, %%"REG_a"), %%mm1 \n\t" - "pxor %%mm7, %%mm0 \n\t" - "paddsb %%mm1, %%mm0 \n\t" - "pxor %%mm7, %%mm0 \n\t" - "movntq %%mm0, (%2, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) - : "%"REG_a - ); - if (mmx_len != len) - line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); -#endif -} - -static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src, - int len, const int8_t * const *shift) +void ff_line_noise_avg_c(uint8_t *dst, const uint8_t *src, + int len, const int8_t * const *shift) { int i; const int8_t *src2 = (const int8_t*)src; @@ -280,50 +187,6 @@ static inline void line_noise_avg_c(uint8_t *dst, const uint8_t *src, } } -static inline void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, - int len, const int8_t * const *shift) -{ -#if HAVE_MMX_INLINE && HAVE_6REGS - x86_reg mmx_len= len&(~7); - - __asm__ volatile( - "mov %5, %%"REG_a" \n\t" - ASMALIGN(4) - "1: \n\t" - "movq (%1, %%"REG_a"), %%mm1 \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "paddb (%2, %%"REG_a"), %%mm1 \n\t" - "paddb (%3, %%"REG_a"), %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpckhbw %%mm2, %%mm2 \n\t" - "punpcklbw %%mm1, %%mm1 \n\t" - "punpckhbw %%mm3, %%mm3 \n\t" - "pmulhw %%mm0, %%mm1 \n\t" - "pmulhw %%mm2, %%mm3 \n\t" - "paddw %%mm1, %%mm1 \n\t" - "paddw %%mm3, %%mm3 \n\t" - "paddw %%mm0, %%mm1 \n\t" - "paddw %%mm2, %%mm3 \n\t" - "psrlw $8, %%mm1 \n\t" - "psrlw $8, %%mm3 \n\t" - "packuswb %%mm3, %%mm1 \n\t" - "movq %%mm1, (%4, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - " js 1b \n\t" - :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len), - "r" (dst+mmx_len), "g" (-mmx_len) - : "%"REG_a - ); - - if (mmx_len != len){ - const int8_t *shift2[3]={shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len}; - line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2); - } -#endif -} - static void noise(uint8_t *dst, const uint8_t *src, int dst_linesize, int src_linesize, int width, int start, int end, NoiseContext *n, int comp) @@ -421,7 +284,6 @@ static av_cold int init(AVFilterContext *ctx) { NoiseContext *n = ctx->priv; int ret, i; - int cpu_flags = av_get_cpu_flags(); for (i = 0; i < 4; i++) { if (n->all.seed >= 0) @@ -439,19 +301,11 @@ static av_cold int init(AVFilterContext *ctx) return ret; } - n->line_noise = line_noise_c; - n->line_noise_avg = line_noise_avg_c; + n->line_noise = ff_line_noise_c; + n->line_noise_avg = ff_line_noise_avg_c; - if (HAVE_MMX_INLINE && - cpu_flags & AV_CPU_FLAG_MMX) { - n->line_noise = line_noise_mmx; -#if HAVE_6REGS - n->line_noise_avg = line_noise_avg_mmx; -#endif - } - if (HAVE_MMXEXT_INLINE && - cpu_flags & AV_CPU_FLAG_MMXEXT) - n->line_noise = line_noise_mmxext; + if (ARCH_X86) + ff_noise_init_x86(n); return 0; } diff --git a/libavfilter/vf_noise.h b/libavfilter/vf_noise.h new file mode 100644 index 0000000000..2207ed961f --- /dev/null +++ b/libavfilter/vf_noise.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2002 Michael Niedermayer + * Copyright (c) 2013 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_NOISE_H +#define AVFILTER_NOISE_H + +#include "libavutil/lfg.h" +#include "avfilter.h" + +#define MAX_NOISE 5120 +#define MAX_SHIFT 1024 +#define MAX_RES (MAX_NOISE-MAX_SHIFT) + +#define NOISE_UNIFORM 1 +#define NOISE_TEMPORAL 2 +#define NOISE_AVERAGED 8 +#define NOISE_PATTERN 16 + +typedef struct { + int strength; + unsigned flags; + AVLFG lfg; + int seed; + int8_t *noise; + int8_t *prev_shift[MAX_RES][3]; + int rand_shift[MAX_RES]; + int rand_shift_init; +} FilterParams; + +typedef struct { + const AVClass *class; + int nb_planes; + int bytewidth[4]; + int height[4]; + FilterParams all; + FilterParams param[4]; + void (*line_noise)(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift); + void (*line_noise_avg)(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift); +} NoiseContext; + +void ff_line_noise_c(uint8_t *dst, const uint8_t *src, const int8_t *noise, int len, int shift); +void ff_line_noise_avg_c(uint8_t *dst, const uint8_t *src, int len, const int8_t * const *shift); + +void ff_noise_init_x86(NoiseContext *n); + +#endif /* AVFILTER_NOISE_H */ diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile index ddb3774862..32145db584 100644 --- a/libavfilter/x86/Makefile +++ b/libavfilter/x86/Makefile @@ -1,6 +1,7 @@ OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o +OBJS-$(CONFIG_NOISE_FILTER) += x86/vf_noise.o OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o diff --git a/libavfilter/x86/vf_noise.c b/libavfilter/x86/vf_noise.c new file mode 100644 index 0000000000..0a86cb084b --- /dev/null +++ b/libavfilter/x86/vf_noise.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2002 Michael Niedermayer + * Copyright (c) 2013 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/x86/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavfilter/vf_noise.h" + +#if HAVE_INLINE_ASM +static void line_noise_mmx(uint8_t *dst, const uint8_t *src, + const int8_t *noise, int len, int shift) +{ + x86_reg mmx_len= len & (~7); + noise += shift; + + __asm__ volatile( + "mov %3, %%"REG_a" \n\t" + "pcmpeqb %%mm7, %%mm7 \n\t" + "psllw $15, %%mm7 \n\t" + "packsswb %%mm7, %%mm7 \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "pxor %%mm7, %%mm0 \n\t" + "paddsb %%mm1, %%mm0 \n\t" + "pxor %%mm7, %%mm0 \n\t" + "movq %%mm0, (%2, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) + : "%"REG_a + ); + if (mmx_len != len) + ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); +} + +#if HAVE_6REGS +static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, + int len, const int8_t * const *shift) +{ + x86_reg mmx_len = len & (~7); + + __asm__ volatile( + "mov %5, %%"REG_a" \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "paddb (%2, %%"REG_a"), %%mm1 \n\t" + "paddb (%3, %%"REG_a"), %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpckhbw %%mm2, %%mm2 \n\t" + "punpcklbw %%mm1, %%mm1 \n\t" + "punpckhbw %%mm3, %%mm3 \n\t" + "pmulhw %%mm0, %%mm1 \n\t" + "pmulhw %%mm2, %%mm3 \n\t" + "paddw %%mm1, %%mm1 \n\t" + "paddw %%mm3, %%mm3 \n\t" + "paddw %%mm0, %%mm1 \n\t" + "paddw %%mm2, %%mm3 \n\t" + "psrlw $8, %%mm1 \n\t" + "psrlw $8, %%mm3 \n\t" + "packuswb %%mm3, %%mm1 \n\t" + "movq %%mm1, (%4, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len), + "r" (dst+mmx_len), "g" (-mmx_len) + : "%"REG_a + ); + + if (mmx_len != len){ + const int8_t *shift2[3] = { shift[0]+mmx_len, shift[1]+mmx_len, shift[2]+mmx_len }; + ff_line_noise_avg_c(dst+mmx_len, src+mmx_len, len-mmx_len, shift2); + } +} +#endif /* HAVE_6REGS */ + +static void line_noise_mmxext(uint8_t *dst, const uint8_t *src, + const int8_t *noise, int len, int shift) +{ + x86_reg mmx_len = len & (~7); + noise += shift; + + __asm__ volatile( + "mov %3, %%"REG_a" \n\t" + "pcmpeqb %%mm7, %%mm7 \n\t" + "psllw $15, %%mm7 \n\t" + "packsswb %%mm7, %%mm7 \n\t" + ".p2align 4 \n\t" + "1: \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "pxor %%mm7, %%mm0 \n\t" + "paddsb %%mm1, %%mm0 \n\t" + "pxor %%mm7, %%mm0 \n\t" + "movntq %%mm0, (%2, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) + : "%"REG_a + ); + if (mmx_len != len) + ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); +} +#endif /* HAVE_INLINE_ASM */ + +av_cold void ff_noise_init_x86(NoiseContext *n) +{ +#if HAVE_INLINE_ASM + int cpu_flags = av_get_cpu_flags(); + + if (INLINE_MMX(cpu_flags)) { + n->line_noise = line_noise_mmx; +#if HAVE_6REGS + n->line_noise_avg = line_noise_avg_mmx; +#endif + } + if (INLINE_MMXEXT(cpu_flags)) { + n->line_noise = line_noise_mmxext; + } +#endif +}