mirror of https://git.ffmpeg.org/ffmpeg.git
avfilter/vf_idet: MMX/MMXEXT/SSE2 implementation of idet's filter_line()
integration by Neil Birkbeck, with help from Vitor Sessak. core SSE2 loop by Skal (pascal.massimino@gmail.com) Reviewed-by: Clément Bœsch <u@pkh.me> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
53b0892005
commit
406a9ccffe
|
@ -353,6 +353,7 @@ Filters:
|
|||
vf_extractplanes.c Paul B Mahol
|
||||
vf_histogram.c Paul B Mahol
|
||||
vf_hqx.c Clément Bœsch
|
||||
vf_idec.c Pascal Massimino
|
||||
vf_il.c Paul B Mahol
|
||||
vf_lenscorrection.c Daniel Oberhoff
|
||||
vf_mergeplanes.c Paul B Mahol
|
||||
|
|
|
@ -23,37 +23,8 @@
|
|||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "avfilter.h"
|
||||
#include "internal.h"
|
||||
|
||||
#define HIST_SIZE 4
|
||||
|
||||
typedef enum {
|
||||
TFF,
|
||||
BFF,
|
||||
PROGRSSIVE,
|
||||
UNDETERMINED,
|
||||
} Type;
|
||||
|
||||
typedef struct {
|
||||
const AVClass *class;
|
||||
float interlace_threshold;
|
||||
float progressive_threshold;
|
||||
|
||||
Type last_type;
|
||||
int prestat[4];
|
||||
int poststat[4];
|
||||
|
||||
uint8_t history[HIST_SIZE];
|
||||
|
||||
AVFrame *cur;
|
||||
AVFrame *next;
|
||||
AVFrame *prev;
|
||||
int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w);
|
||||
|
||||
const AVPixFmtDescriptor *csp;
|
||||
} IDETContext;
|
||||
#include "vf_idet.h"
|
||||
|
||||
#define OFFSET(x) offsetof(IDETContext, x)
|
||||
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
|
||||
|
@ -77,7 +48,7 @@ static const char *type2str(Type type)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w)
|
||||
int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w)
|
||||
{
|
||||
int x;
|
||||
int ret=0;
|
||||
|
@ -271,7 +242,10 @@ static av_cold int init(AVFilterContext *ctx)
|
|||
idet->last_type = UNDETERMINED;
|
||||
memset(idet->history, UNDETERMINED, HIST_SIZE);
|
||||
|
||||
idet->filter_line = filter_line_c;
|
||||
idet->filter_line = ff_idet_filter_line_c;
|
||||
|
||||
if (ARCH_X86)
|
||||
ff_idet_init_x86(idet);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVFILTER_IDET_H
|
||||
#define AVFILTER_IDET_H
|
||||
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "avfilter.h"
|
||||
|
||||
#define HIST_SIZE 4
|
||||
|
||||
typedef enum {
|
||||
TFF,
|
||||
BFF,
|
||||
PROGRSSIVE,
|
||||
UNDETERMINED,
|
||||
} Type;
|
||||
|
||||
typedef struct {
|
||||
const AVClass *class;
|
||||
float interlace_threshold;
|
||||
float progressive_threshold;
|
||||
|
||||
Type last_type;
|
||||
int prestat[4];
|
||||
int poststat[4];
|
||||
|
||||
uint8_t history[HIST_SIZE];
|
||||
|
||||
AVFrame *cur;
|
||||
AVFrame *next;
|
||||
AVFrame *prev;
|
||||
int (*filter_line)(const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w);
|
||||
|
||||
const AVPixFmtDescriptor *csp;
|
||||
} IDETContext;
|
||||
|
||||
void ff_idet_init_x86(IDETContext *idet);
|
||||
|
||||
/* main fall-back for left-over */
|
||||
int ff_idet_filter_line_c(const uint8_t *a, const uint8_t *b, const uint8_t *c, int w);
|
||||
|
||||
#endif
|
|
@ -1,5 +1,6 @@
|
|||
OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun_init.o
|
||||
OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d_init.o
|
||||
OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet_init.o
|
||||
OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o
|
||||
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o
|
||||
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o
|
||||
|
@ -7,6 +8,7 @@ OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif_init.o
|
|||
|
||||
YASM-OBJS-$(CONFIG_GRADFUN_FILTER) += x86/vf_gradfun.o
|
||||
YASM-OBJS-$(CONFIG_HQDN3D_FILTER) += x86/vf_hqdn3d.o
|
||||
YASM-OBJS-$(CONFIG_IDET_FILTER) += x86/vf_idet.o
|
||||
YASM-OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup.o
|
||||
YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o
|
||||
YASM-OBJS-$(CONFIG_YADIF_FILTER) += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
; *****************************************************************************
|
||||
; * x86-optimized functions for idet filter
|
||||
; *
|
||||
; * This file is part of FFmpeg.
|
||||
; *
|
||||
; * FFmpeg is free software; you can redistribute it and/or modify
|
||||
; * it under the terms of the GNU General Public License as published by
|
||||
; * the Free Software Foundation; either version 2 of the License, or
|
||||
; * (at your option) any later version.
|
||||
; *
|
||||
; * FFmpeg is distributed in the hope that it will be useful,
|
||||
; * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
; * GNU General Public License for more details.
|
||||
; *
|
||||
; * You should have received a copy of the GNU General Public License along
|
||||
; * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||||
; * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
; ******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_TEXT
|
||||
|
||||
%if ARCH_X86_32
|
||||
|
||||
; Implementation that does 8-bytes at a time using single-word operations.
|
||||
%macro IDET_FILTER_LINE 1
|
||||
INIT_MMX %1
|
||||
cglobal idet_filter_line, 4, 5, 0, a, b, c, width, index
|
||||
xor indexq, indexq
|
||||
%define m_zero m2
|
||||
%define m_sum m5
|
||||
pxor m_sum, m_sum
|
||||
pxor m_zero, m_zero
|
||||
|
||||
.loop:
|
||||
movu m0, [aq + indexq*1]
|
||||
punpckhbw m1, m0, m_zero
|
||||
punpcklbw m0, m_zero
|
||||
|
||||
movu m3, [cq + indexq*1]
|
||||
punpckhbw m4, m3, m_zero
|
||||
punpcklbw m3, m_zero
|
||||
|
||||
paddsw m1, m4
|
||||
paddsw m0, m3
|
||||
|
||||
movu m3, [bq + indexq*1]
|
||||
punpckhbw m4, m3, m_zero
|
||||
punpcklbw m3, m_zero
|
||||
|
||||
paddw m4, m4
|
||||
paddw m3, m3
|
||||
psubsw m1, m4
|
||||
psubsw m0, m3
|
||||
|
||||
ABS2 m1, m0, m4, m3
|
||||
|
||||
paddw m0, m1
|
||||
punpckhwd m1, m0, m_zero
|
||||
punpcklwd m0, m_zero
|
||||
|
||||
paddd m0, m1
|
||||
paddd m_sum, m0
|
||||
|
||||
add indexq, 0x8
|
||||
CMP widthd, indexd
|
||||
jg .loop
|
||||
|
||||
mova m0, m_sum
|
||||
psrlq m_sum, 0x20
|
||||
paddq m0, m_sum
|
||||
movd eax, m0
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
IDET_FILTER_LINE mmxext
|
||||
IDET_FILTER_LINE mmx
|
||||
%endif
|
||||
|
||||
; SSE2 8-bit implementation that does 16-bytes at a time:
|
||||
INIT_XMM sse2
|
||||
cglobal idet_filter_line, 4, 6, 7, a, b, c, width, index, total
|
||||
xor indexq, indexq
|
||||
pxor m0, m0
|
||||
pxor m1, m1
|
||||
|
||||
.sse2_loop:
|
||||
movu m2, [bq + indexq*1] ; B
|
||||
movu m3, [aq + indexq*1] ; A
|
||||
mova m6, m2
|
||||
mova m4, m3
|
||||
psubusb m5, m2, m3 ; ba
|
||||
|
||||
movu m3, [cq + indexq*1] ; C
|
||||
add indexq, 0x10
|
||||
psubusb m4, m2 ; ab
|
||||
CMP indexd, widthd
|
||||
|
||||
psubusb m6, m3 ; bc
|
||||
psubusb m3, m2 ; cb
|
||||
|
||||
psadbw m4, m6 ; |ab - bc|
|
||||
paddq m0, m4
|
||||
psadbw m5, m3 ; |ba - cb|
|
||||
paddq m1, m5
|
||||
jl .sse2_loop
|
||||
|
||||
paddq m0, m1
|
||||
movhlps m1, m0
|
||||
paddq m0, m1
|
||||
movd eax, m0
|
||||
RET
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/mem.h"
|
||||
#include "libavutil/x86/asm.h"
|
||||
#include "libavutil/x86/cpu.h"
|
||||
#include "libavfilter/vf_idet.h"
|
||||
|
||||
/* declares main callable idet_filter_line_{mmx,mmxext,sse2}() */
|
||||
#define FUNC_MAIN_DECL(KIND, SPAN) \
|
||||
int ff_idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \
|
||||
const uint8_t *c, int w); \
|
||||
static int idet_filter_line_##KIND(const uint8_t *a, const uint8_t *b, \
|
||||
const uint8_t *c, int w) { \
|
||||
int sum = 0; \
|
||||
const int left_over = w & (SPAN - 1); \
|
||||
w -= left_over; \
|
||||
if (w > 0) \
|
||||
sum += ff_idet_filter_line_##KIND(a, b, c, w); \
|
||||
if (left_over > 0) \
|
||||
sum += ff_idet_filter_line_c(a + w, b + w, c + w, left_over); \
|
||||
return sum; \
|
||||
}
|
||||
|
||||
#if HAVE_YASM
|
||||
|
||||
FUNC_MAIN_DECL(sse2, 16)
|
||||
#if ARCH_X86_32
|
||||
FUNC_MAIN_DECL(mmx, 8)
|
||||
FUNC_MAIN_DECL(mmxext, 8)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
av_cold void ff_idet_init_x86(IDETContext *idet)
|
||||
{
|
||||
#if HAVE_YASM
|
||||
const int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
#if ARCH_X86_32
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
idet->filter_line = idet_filter_line_mmx;
|
||||
}
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) {
|
||||
idet->filter_line = idet_filter_line_mmxext;
|
||||
}
|
||||
#endif // ARCH_x86_32
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
idet->filter_line = idet_filter_line_sse2;
|
||||
}
|
||||
#endif // HAVE_YASM
|
||||
}
|
Loading…
Reference in New Issue