mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-26 09:13:26 +00:00
x86/svq1enc: port ssd_int8_vs_int16 to yasm
Also add an SSE2 version Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
19b79c1429
commit
dad31083ae
@ -77,7 +77,7 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type)
|
|||||||
#define THRESHOLD_MULTIPLIER 0.6
|
#define THRESHOLD_MULTIPLIER 0.6
|
||||||
|
|
||||||
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
|
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
|
||||||
int size)
|
intptr_t size)
|
||||||
{
|
{
|
||||||
int score = 0, i;
|
int score = 0, i;
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ typedef struct SVQ1EncContext {
|
|||||||
uint8_t *scratchbuf;
|
uint8_t *scratchbuf;
|
||||||
|
|
||||||
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
|
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
|
||||||
int size);
|
intptr_t size);
|
||||||
} SVQ1EncContext;
|
} SVQ1EncContext;
|
||||||
|
|
||||||
void ff_svq1enc_init_ppc(SVQ1EncContext *c);
|
void ff_svq1enc_init_ppc(SVQ1EncContext *c);
|
||||||
|
@ -45,7 +45,7 @@ OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
|
|||||||
OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o
|
OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o
|
||||||
OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \
|
OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \
|
||||||
x86/rv40dsp_init.o
|
x86/rv40dsp_init.o
|
||||||
OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
|
OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_init.o
|
||||||
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
|
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
|
||||||
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
|
OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp_init.o
|
||||||
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
|
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
|
||||||
@ -122,6 +122,7 @@ YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
|
|||||||
YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o
|
YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o
|
||||||
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \
|
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \
|
||||||
x86/rv40dsp.o
|
x86/rv40dsp.o
|
||||||
|
YASM-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc.o
|
||||||
YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
|
YASM-OBJS-$(CONFIG_TTA_DECODER) += x86/ttadsp.o
|
||||||
YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
|
YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
|
||||||
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o
|
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o
|
||||||
|
61
libavcodec/x86/svq1enc.asm
Normal file
61
libavcodec/x86/svq1enc.asm
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
;******************************************************************************
|
||||||
|
;* SIMD-optimized SVQ1 encoder functions
|
||||||
|
;* Copyright (c) 2007 Loren Merritt
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
SECTION_TEXT
|
||||||
|
|
||||||
|
%macro SSD_INT8_VS_INT16 0
|
||||||
|
cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size
|
||||||
|
pxor m0, m0
|
||||||
|
.loop
|
||||||
|
sub sizeq, 8
|
||||||
|
movq m1, [pix1q + sizeq]
|
||||||
|
mova m2, [pix2q + sizeq*2]
|
||||||
|
%if mmsize == 8
|
||||||
|
movq m3, [pix2q + sizeq*2 + mmsize]
|
||||||
|
punpckhbw m4, m1
|
||||||
|
punpcklbw m1, m1
|
||||||
|
psraw m4, 8
|
||||||
|
psraw m1, 8
|
||||||
|
psubw m3, m4
|
||||||
|
psubw m2, m1
|
||||||
|
pmaddwd m3, m3
|
||||||
|
pmaddwd m2, m2
|
||||||
|
paddd m0, m3
|
||||||
|
paddd m0, m2
|
||||||
|
%else
|
||||||
|
punpcklbw m1, m1
|
||||||
|
psraw m1, 8
|
||||||
|
psubw m2, m1
|
||||||
|
pmaddwd m2, m2
|
||||||
|
paddd m0, m2
|
||||||
|
%endif
|
||||||
|
jg .loop
|
||||||
|
HADDD m0, m1
|
||||||
|
movd eax, m0
|
||||||
|
RET
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
INIT_MMX mmx
|
||||||
|
SSD_INT8_VS_INT16
|
||||||
|
INIT_XMM sse2
|
||||||
|
SSD_INT8_VS_INT16
|
@ -1,75 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2007 Loren Merritt
|
|
||||||
*
|
|
||||||
* This file is part of FFmpeg.
|
|
||||||
*
|
|
||||||
* FFmpeg is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
|
||||||
* License as published by the Free Software Foundation; either
|
|
||||||
* version 2.1 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* FFmpeg is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* Lesser General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Lesser General Public
|
|
||||||
* License along with FFmpeg; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
#include "libavutil/attributes.h"
|
|
||||||
#include "libavutil/cpu.h"
|
|
||||||
#include "libavutil/x86/asm.h"
|
|
||||||
#include "libavutil/x86/cpu.h"
|
|
||||||
#include "libavcodec/svq1enc.h"
|
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
|
||||||
|
|
||||||
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
|
|
||||||
int size)
|
|
||||||
{
|
|
||||||
int sum;
|
|
||||||
x86_reg i = size;
|
|
||||||
|
|
||||||
__asm__ volatile (
|
|
||||||
"pxor %%mm4, %%mm4 \n"
|
|
||||||
"1: \n"
|
|
||||||
"sub $8, %0 \n"
|
|
||||||
"movq (%2, %0), %%mm2 \n"
|
|
||||||
"movq (%3, %0, 2), %%mm0 \n"
|
|
||||||
"movq 8(%3, %0, 2), %%mm1 \n"
|
|
||||||
"punpckhbw %%mm2, %%mm3 \n"
|
|
||||||
"punpcklbw %%mm2, %%mm2 \n"
|
|
||||||
"psraw $8, %%mm3 \n"
|
|
||||||
"psraw $8, %%mm2 \n"
|
|
||||||
"psubw %%mm3, %%mm1 \n"
|
|
||||||
"psubw %%mm2, %%mm0 \n"
|
|
||||||
"pmaddwd %%mm1, %%mm1 \n"
|
|
||||||
"pmaddwd %%mm0, %%mm0 \n"
|
|
||||||
"paddd %%mm1, %%mm4 \n"
|
|
||||||
"paddd %%mm0, %%mm4 \n"
|
|
||||||
"jg 1b \n"
|
|
||||||
"movq %%mm4, %%mm3 \n"
|
|
||||||
"psrlq $32, %%mm3 \n"
|
|
||||||
"paddd %%mm3, %%mm4 \n"
|
|
||||||
"movd %%mm4, %1 \n"
|
|
||||||
: "+r" (i), "=r" (sum)
|
|
||||||
: "r" (pix1), "r" (pix2));
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* HAVE_INLINE_ASM */
|
|
||||||
|
|
||||||
av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
|
|
||||||
{
|
|
||||||
#if HAVE_INLINE_ASM
|
|
||||||
int cpu_flags = av_get_cpu_flags();
|
|
||||||
|
|
||||||
if (INLINE_MMX(cpu_flags)) {
|
|
||||||
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
|
|
||||||
}
|
|
||||||
#endif /* HAVE_INLINE_ASM */
|
|
||||||
}
|
|
42
libavcodec/x86/svq1enc_init.c
Normal file
42
libavcodec/x86/svq1enc_init.c
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2007 Loren Merritt
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavutil/cpu.h"
|
||||||
|
#include "libavutil/x86/cpu.h"
|
||||||
|
#include "libavcodec/svq1enc.h"
|
||||||
|
|
||||||
|
int ff_ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
|
||||||
|
intptr_t size);
|
||||||
|
int ff_ssd_int8_vs_int16_sse2(const int8_t *pix1, const int16_t *pix2,
|
||||||
|
intptr_t size);
|
||||||
|
|
||||||
|
av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
|
c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_mmx;
|
||||||
|
}
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
|
c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_sse2;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user