huffyuvencdsp: Convert ff_diff_bytes_mmx to yasm

Heavily based upon ff_add_bytes by Christophe Gisquet.

Reviewed-by: James Almer <jamrial@gmail.com>
Signed-off-by: Timothy Gu <timothygu99@gmail.com>
This commit is contained in:
Timothy Gu 2015-10-19 02:25:11 +01:00
parent 1ec8c1554e
commit 6b41b44149
3 changed files with 81 additions and 30 deletions

View File

@ -115,6 +115,7 @@ YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_8bit.o \
YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
x86/hpeldsp.o
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o
YASM-OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp.o
YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o

View File

@ -0,0 +1,73 @@
;************************************************************************
;* SIMD-optimized HuffYUV encoding functions
;* Copyright (c) 2000, 2001 Fabrice Bellard
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
;*
;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
section .text
INIT_MMX mmx
; void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
; intptr_t w);
%if ARCH_X86_32
cglobal diff_bytes, 3,5,2, dst, src1, src2
%define wq r4q
DECLARE_REG_TMP 3
mov wq, r3mp
%else
cglobal diff_bytes, 4,5,2, dst, src1, src2, w
DECLARE_REG_TMP 4
%endif ; ARCH_X86_32
%define i t0q
mov i, wq
and i, -2 * mmsize
jz .setup_loop2
add dstq, i
add src1q, i
add src2q, i
neg i
.loop:
mova m0, [src1q + i]
mova m1, [src1q + i + mmsize]
psubb m0, [src2q + i]
psubb m1, [src2q + i + mmsize]
mova [dstq + i], m0
mova [mmsize + dstq + i], m1
add i, 2 * mmsize
jl .loop
.setup_loop2:
and wq, 2 * mmsize - 1
jz .end
add dstq, wq
add src1q, wq
add src2q, wq
neg wq
.loop2:
mov t0b, [src1q + wq]
sub t0b, [src2q + wq]
mov [dstq + wq], t0b
inc wq
jl .loop2
.end:
REP_RET

View File

@ -29,34 +29,11 @@
#include "libavcodec/huffyuvencdsp.h"
#include "libavcodec/mathops.h"
void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w);
#if HAVE_INLINE_ASM
static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
intptr_t w)
{
x86_reg i = 0;
if (w >= 16)
__asm__ volatile (
"1: \n\t"
"movq (%2, %0), %%mm0 \n\t"
"movq (%1, %0), %%mm1 \n\t"
"psubb %%mm0, %%mm1 \n\t"
"movq %%mm1, (%3, %0) \n\t"
"movq 8(%2, %0), %%mm0 \n\t"
"movq 8(%1, %0), %%mm1 \n\t"
"psubb %%mm0, %%mm1 \n\t"
"movq %%mm1, 8(%3, %0) \n\t"
"add $16, %0 \n\t"
"cmp %4, %0 \n\t"
" jb 1b \n\t"
: "+r" (i)
: "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w - 15));
for (; i < w; i++)
dst[i + 0] = src1[i + 0] - src2[i + 0];
}
static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
const uint8_t *src2, intptr_t w,
int *left, int *left_top)
@ -101,13 +78,13 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
{
#if HAVE_INLINE_ASM
int cpu_flags = av_get_cpu_flags();
av_unused int cpu_flags = av_get_cpu_flags();
if (INLINE_MMX(cpu_flags)) {
c->diff_bytes = diff_bytes_mmx;
if (EXTERNAL_MMX(cpu_flags)) {
c->diff_bytes = ff_diff_bytes_mmx;
}
#if HAVE_INLINE_ASM
if (INLINE_MMXEXT(cpu_flags)) {
c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext;
}