avfilter/vf_idet: MMX/MMXEXT/SSE2 implementation of idet's filter_line()

integration by Neil Birkbeck, with help from Vitor Sessak.
core SSE2 loop by Skal (pascal.massimino@gmail.com)

Reviewed-by: Clément Bœsch <u@pkh.me>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
skal 2014-09-03 11:02:32 +02:00 committed by Michael Niedermayer
parent 53b0892005
commit 406a9ccffe
6 changed files with 251 additions and 32 deletions

View File

@ -353,6 +353,7 @@ Filters:
vf_extractplanes.c Paul B Mahol
vf_histogram.c Paul B Mahol
vf_hqx.c Clément Bœsch
vf_idec.c Pascal Massimino
vf_il.c Paul B Mahol
vf_lenscorrection.c Daniel Oberhoff
vf_mergeplanes.c Paul B Mahol

View File

@ -23,37 +23,8 @@
#include "libavutil/cpu.h"
#include "libavutil/common.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "avfilter.h"
#include "internal.h"
#define HIST_SIZE 4
typedef enum {
TFF,
BFF,
PROGRSSIVE,
UNDETERMINED,
} Type;
typedef struct {
const AVClass *class;
float interlace_threshold;
float progressive_threshold;
Type last_type;
int prestat[4];
int poststat[4];
uint8_t history[HIST_SIZE];
AVFrame *cur;
AVFrame *next;
AVFrame *prev;
int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w);
const AVPixFmtDescriptor *csp;
} IDETContext;
#include "vf_idet.h"
#define OFFSET(x) offsetof(IDETContext, x)
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
@ -77,7 +48,7 @@ static const char *type2str(Type type)
return NULL;
}
static int filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w)
int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w)
{
int x;
int ret=0;
@ -271,7 +242,10 @@ static av_cold int init(AVFilterContext *ctx)
idet->last_type = UNDETERMINED;
memset(idet->history, UNDETERMINED, HIST_SIZE);
idet->filter_line = filter_line_c;
idet->filter_line = ff_idet_filter_line_c;
if (ARCH_X86)
ff_idet_init_x86(idet);
return 0;
}

58
libavfilter/vf_idet.h Normal file
View File

@ -0,0 +1,58 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFILTER_IDET_H
#define AVFILTER_IDET_H
#include "libavutil/pixdesc.h"
#include "avfilter.h"
#define HIST_SIZE 4
typedef enum {
TFF,
BFF,
PROGRSSIVE,
UNDETERMINED,
} Type;
typedef struct {
const AVClass *class;
float interlace_threshold;
float progressive_threshold;
Type last_type;
int prestat[4];
int poststat[4];
uint8_t history[HIST_SIZE];
AVFrame *cur;
AVFrame *next;
AVFrame *prev;
int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w);
const AVPixFmtDescriptor *csp;
} IDETContext;
void ff_idet_init_x86(IDETContext *idet);
/* main fall-back for left-over */
int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w);
#endif

View File

@ -1,5 +1,6 @@
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
@ -7,6 +8,7 @@ OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
YASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
YASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o
YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o

114
libavfilter/x86/vf_idet.asm Normal file
View File

@ -0,0 +1,114 @@
; *****************************************************************************
; * x86-optimized functions for idet filter
; *
; * This file is part of FFmpeg.
; *
; * FFmpeg is free software; you can redistribute it and/or modify
; * it under the terms of the GNU General Public License as published by
; * the Free Software Foundation; either version 2 of the License, or
; * (at your option) any later version.
; *
; * FFmpeg is distributed in the hope that it will be useful,
; * but WITHOUT ANY WARRANTY; without even the implied warranty of
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; * GNU General Public License for more details.
; *
; * You should have received a copy of the GNU General Public License along
; * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
; * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
; ******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION_TEXT
%if ARCH_X86_32
; Implementation that does 8-bytes at a time using single-word operations.
%macro IDET_FILTER_LINE 1
INIT_MMX %1
cglobal idet_filter_line, 4, 5, 0, a, b, c, width, index
xor indexq, indexq
%define m_zero m2
%define m_sum m5
pxor m_sum, m_sum
pxor m_zero, m_zero
.loop:
movu m0, [aq + indexq*1]
punpckhbw m1, m0, m_zero
punpcklbw m0, m_zero
movu m3, [cq + indexq*1]
punpckhbw m4, m3, m_zero
punpcklbw m3, m_zero
paddsw m1, m4
paddsw m0, m3
movu m3, [bq + indexq*1]
punpckhbw m4, m3, m_zero
punpcklbw m3, m_zero
paddw m4, m4
paddw m3, m3
psubsw m1, m4
psubsw m0, m3
ABS2 m1, m0, m4, m3
paddw m0, m1
punpckhwd m1, m0, m_zero
punpcklwd m0, m_zero
paddd m0, m1
paddd m_sum, m0
add indexq, 0x8
CMP widthd, indexd
jg .loop
mova m0, m_sum
psrlq m_sum, 0x20
paddq m0, m_sum
movd eax, m0
RET
%endmacro
IDET_FILTER_LINE mmxext
IDET_FILTER_LINE mmx
%endif
; SSE2 8-bit implementation that does 16-bytes at a time:
INIT_XMM sse2
cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
xor indexq, indexq
pxor m0, m0
pxor m1, m1
.sse2_loop:
movu m2, [bq + indexq*1] ; B
movu m3, [aq + indexq*1] ; A
mova m6, m2
mova m4, m3
psubusb m5, m2, m3 ; ba
movu m3, [cq + indexq*1] ; C
add indexq, 0x10
psubusb m4, m2 ; ab
CMP indexd, widthd
psubusb m6, m3 ; bc
psubusb m3, m2 ; cb
psadbw m4, m6 ; |ab - bc|
paddq m0, m4
psadbw m5, m3 ; |ba - cb|
paddq m1, m5
jl .sse2_loop
paddq m0, m1
movhlps m1, m0
paddq m0, m1
movd eax, m0
RET

View File

@ -0,0 +1,70 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/vf_idet.h"
/* declares main callable idet_filter_line_{mmx,mmxext,sse2}() */
#define FUNC_MAIN_DECL(KIND, SPAN) \
int ff_idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \
const uint8_t *c, int w); \
static int idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \
const uint8_t *c, int w) { \
int sum = 0; \
const int left_over = w & (SPAN - 1); \
w -= left_over; \
if (w > 0) \
sum += ff_idet_filter_line_##KIND(a, b, c, w); \
if (left_over > 0) \
sum += ff_idet_filter_line_c(a + w, b + w, c + w, left_over); \
return sum; \
}
#if HAVE_YASM
FUNC_MAIN_DECL(sse2, 16)
#if ARCH_X86_32
FUNC_MAIN_DECL(mmx, 8)
FUNC_MAIN_DECL(mmxext, 8)
#endif
#endif
av_cold void ff_idet_init_x86(IDETContext *idet)
{
#if HAVE_YASM
const int cpu_flags = av_get_cpu_flags();
#if ARCH_X86_32
if (EXTERNAL_MMX(cpu_flags)) {
idet->filter_line = idet_filter_line_mmx;
}
if (EXTERNAL_MMXEXT(cpu_flags)) {
idet->filter_line = idet_filter_line_mmxext;
}
#endif // ARCH_x86_32
if (EXTERNAL_SSE2(cpu_flags)) {
idet->filter_line = idet_filter_line_sse2;
}
#endif // HAVE_YASM
}