mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-15 11:44:49 +00:00
huffyuvencdsp: Convert ff_diff_bytes_mmx to yasm
Heavily based upon ff_add_bytes by Christophe Gisquet. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Timothy Gu <timothygu99@gmail.com>
This commit is contained in:
parent
1ec8c1554e
commit
6b41b44149
@ -115,6 +115,7 @@ YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_8bit.o \
|
||||
YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
|
||||
x86/hpeldsp.o
|
||||
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o
|
||||
YASM-OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp.o
|
||||
YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o
|
||||
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o
|
||||
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o
|
||||
|
73
libavcodec/x86/huffyuvencdsp.asm
Normal file
73
libavcodec/x86/huffyuvencdsp.asm
Normal file
@ -0,0 +1,73 @@
|
||||
;************************************************************************
|
||||
;* SIMD-optimized HuffYUV encoding functions
|
||||
;* Copyright (c) 2000, 2001 Fabrice Bellard
|
||||
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
||||
;*
|
||||
;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
|
||||
;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
;*
|
||||
;* FFmpeg is free software; you can redistribute it and/or
|
||||
;* modify it under the terms of the GNU Lesser General Public
|
||||
;* License as published by the Free Software Foundation; either
|
||||
;* version 2.1 of the License, or (at your option) any later version.
|
||||
;*
|
||||
;* FFmpeg is distributed in the hope that it will be useful,
|
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;* Lesser General Public License for more details.
|
||||
;*
|
||||
;* You should have received a copy of the GNU Lesser General Public
|
||||
;* License along with FFmpeg; if not, write to the Free Software
|
||||
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
;******************************************************************************
|
||||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
section .text
|
||||
|
||||
INIT_MMX mmx
|
||||
; void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
; intptr_t w);
|
||||
%if ARCH_X86_32
|
||||
cglobal diff_bytes, 3,5,2, dst, src1, src2
|
||||
%define wq r4q
|
||||
DECLARE_REG_TMP 3
|
||||
mov wq, r3mp
|
||||
%else
|
||||
cglobal diff_bytes, 4,5,2, dst, src1, src2, w
|
||||
DECLARE_REG_TMP 4
|
||||
%endif ; ARCH_X86_32
|
||||
%define i t0q
|
||||
mov i, wq
|
||||
and i, -2 * mmsize
|
||||
jz .setup_loop2
|
||||
add dstq, i
|
||||
add src1q, i
|
||||
add src2q, i
|
||||
neg i
|
||||
.loop:
|
||||
mova m0, [src1q + i]
|
||||
mova m1, [src1q + i + mmsize]
|
||||
psubb m0, [src2q + i]
|
||||
psubb m1, [src2q + i + mmsize]
|
||||
mova [dstq + i], m0
|
||||
mova [mmsize + dstq + i], m1
|
||||
add i, 2 * mmsize
|
||||
jl .loop
|
||||
.setup_loop2:
|
||||
and wq, 2 * mmsize - 1
|
||||
jz .end
|
||||
add dstq, wq
|
||||
add src1q, wq
|
||||
add src2q, wq
|
||||
neg wq
|
||||
.loop2:
|
||||
mov t0b, [src1q + wq]
|
||||
sub t0b, [src2q + wq]
|
||||
mov [dstq + wq], t0b
|
||||
inc wq
|
||||
jl .loop2
|
||||
.end:
|
||||
REP_RET
|
@ -29,34 +29,11 @@
|
||||
#include "libavcodec/huffyuvencdsp.h"
|
||||
#include "libavcodec/mathops.h"
|
||||
|
||||
void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
intptr_t w);
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
|
||||
static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||
intptr_t w)
|
||||
{
|
||||
x86_reg i = 0;
|
||||
|
||||
if (w >= 16)
|
||||
__asm__ volatile (
|
||||
"1: \n\t"
|
||||
"movq (%2, %0), %%mm0 \n\t"
|
||||
"movq (%1, %0), %%mm1 \n\t"
|
||||
"psubb %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm1, (%3, %0) \n\t"
|
||||
"movq 8(%2, %0), %%mm0 \n\t"
|
||||
"movq 8(%1, %0), %%mm1 \n\t"
|
||||
"psubb %%mm0, %%mm1 \n\t"
|
||||
"movq %%mm1, 8(%3, %0) \n\t"
|
||||
"add $16, %0 \n\t"
|
||||
"cmp %4, %0 \n\t"
|
||||
" jb 1b \n\t"
|
||||
: "+r" (i)
|
||||
: "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w - 15));
|
||||
|
||||
for (; i < w; i++)
|
||||
dst[i + 0] = src1[i + 0] - src2[i + 0];
|
||||
}
|
||||
|
||||
static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
|
||||
const uint8_t *src2, intptr_t w,
|
||||
int *left, int *left_top)
|
||||
@ -101,13 +78,13 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
|
||||
|
||||
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
|
||||
{
|
||||
#if HAVE_INLINE_ASM
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
av_unused int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (INLINE_MMX(cpu_flags)) {
|
||||
c->diff_bytes = diff_bytes_mmx;
|
||||
if (EXTERNAL_MMX(cpu_flags)) {
|
||||
c->diff_bytes = ff_diff_bytes_mmx;
|
||||
}
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
if (INLINE_MMXEXT(cpu_flags)) {
|
||||
c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user