mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-16 04:15:05 +00:00
huffyuvencdsp: Convert ff_diff_bytes_mmx to yasm
Heavily based upon ff_add_bytes by Christophe Gisquet. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Timothy Gu <timothygu99@gmail.com>
This commit is contained in:
parent
1ec8c1554e
commit
6b41b44149
@ -115,6 +115,7 @@ YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_8bit.o \
|
|||||||
YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
|
YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
|
||||||
x86/hpeldsp.o
|
x86/hpeldsp.o
|
||||||
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o
|
YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o
|
||||||
|
YASM-OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp.o
|
||||||
YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o
|
YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o
|
||||||
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o
|
YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o
|
||||||
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o
|
YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o
|
||||||
|
73
libavcodec/x86/huffyuvencdsp.asm
Normal file
73
libavcodec/x86/huffyuvencdsp.asm
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
;************************************************************************
|
||||||
|
;* SIMD-optimized HuffYUV encoding functions
|
||||||
|
;* Copyright (c) 2000, 2001 Fabrice Bellard
|
||||||
|
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
||||||
|
;*
|
||||||
|
;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
|
||||||
|
;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
|
||||||
|
;*
|
||||||
|
;* This file is part of FFmpeg.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
;* modify it under the terms of the GNU Lesser General Public
|
||||||
|
;* License as published by the Free Software Foundation; either
|
||||||
|
;* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
;*
|
||||||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;* Lesser General Public License for more details.
|
||||||
|
;*
|
||||||
|
;* You should have received a copy of the GNU Lesser General Public
|
||||||
|
;* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;******************************************************************************
|
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
section .text
|
||||||
|
|
||||||
|
INIT_MMX mmx
|
||||||
|
; void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||||
|
; intptr_t w);
|
||||||
|
%if ARCH_X86_32
|
||||||
|
cglobal diff_bytes, 3,5,2, dst, src1, src2
|
||||||
|
%define wq r4q
|
||||||
|
DECLARE_REG_TMP 3
|
||||||
|
mov wq, r3mp
|
||||||
|
%else
|
||||||
|
cglobal diff_bytes, 4,5,2, dst, src1, src2, w
|
||||||
|
DECLARE_REG_TMP 4
|
||||||
|
%endif ; ARCH_X86_32
|
||||||
|
%define i t0q
|
||||||
|
mov i, wq
|
||||||
|
and i, -2 * mmsize
|
||||||
|
jz .setup_loop2
|
||||||
|
add dstq, i
|
||||||
|
add src1q, i
|
||||||
|
add src2q, i
|
||||||
|
neg i
|
||||||
|
.loop:
|
||||||
|
mova m0, [src1q + i]
|
||||||
|
mova m1, [src1q + i + mmsize]
|
||||||
|
psubb m0, [src2q + i]
|
||||||
|
psubb m1, [src2q + i + mmsize]
|
||||||
|
mova [dstq + i], m0
|
||||||
|
mova [mmsize + dstq + i], m1
|
||||||
|
add i, 2 * mmsize
|
||||||
|
jl .loop
|
||||||
|
.setup_loop2:
|
||||||
|
and wq, 2 * mmsize - 1
|
||||||
|
jz .end
|
||||||
|
add dstq, wq
|
||||||
|
add src1q, wq
|
||||||
|
add src2q, wq
|
||||||
|
neg wq
|
||||||
|
.loop2:
|
||||||
|
mov t0b, [src1q + wq]
|
||||||
|
sub t0b, [src2q + wq]
|
||||||
|
mov [dstq + wq], t0b
|
||||||
|
inc wq
|
||||||
|
jl .loop2
|
||||||
|
.end:
|
||||||
|
REP_RET
|
@ -29,34 +29,11 @@
|
|||||||
#include "libavcodec/huffyuvencdsp.h"
|
#include "libavcodec/huffyuvencdsp.h"
|
||||||
#include "libavcodec/mathops.h"
|
#include "libavcodec/mathops.h"
|
||||||
|
|
||||||
|
void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
||||||
|
intptr_t w);
|
||||||
|
|
||||||
#if HAVE_INLINE_ASM
|
#if HAVE_INLINE_ASM
|
||||||
|
|
||||||
static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
|
|
||||||
intptr_t w)
|
|
||||||
{
|
|
||||||
x86_reg i = 0;
|
|
||||||
|
|
||||||
if (w >= 16)
|
|
||||||
__asm__ volatile (
|
|
||||||
"1: \n\t"
|
|
||||||
"movq (%2, %0), %%mm0 \n\t"
|
|
||||||
"movq (%1, %0), %%mm1 \n\t"
|
|
||||||
"psubb %%mm0, %%mm1 \n\t"
|
|
||||||
"movq %%mm1, (%3, %0) \n\t"
|
|
||||||
"movq 8(%2, %0), %%mm0 \n\t"
|
|
||||||
"movq 8(%1, %0), %%mm1 \n\t"
|
|
||||||
"psubb %%mm0, %%mm1 \n\t"
|
|
||||||
"movq %%mm1, 8(%3, %0) \n\t"
|
|
||||||
"add $16, %0 \n\t"
|
|
||||||
"cmp %4, %0 \n\t"
|
|
||||||
" jb 1b \n\t"
|
|
||||||
: "+r" (i)
|
|
||||||
: "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w - 15));
|
|
||||||
|
|
||||||
for (; i < w; i++)
|
|
||||||
dst[i + 0] = src1[i + 0] - src2[i + 0];
|
|
||||||
}
|
|
||||||
|
|
||||||
static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
|
static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
|
||||||
const uint8_t *src2, intptr_t w,
|
const uint8_t *src2, intptr_t w,
|
||||||
int *left, int *left_top)
|
int *left, int *left_top)
|
||||||
@ -101,13 +78,13 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
|
|||||||
|
|
||||||
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
|
av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
|
||||||
{
|
{
|
||||||
#if HAVE_INLINE_ASM
|
av_unused int cpu_flags = av_get_cpu_flags();
|
||||||
int cpu_flags = av_get_cpu_flags();
|
|
||||||
|
|
||||||
if (INLINE_MMX(cpu_flags)) {
|
if (EXTERNAL_MMX(cpu_flags)) {
|
||||||
c->diff_bytes = diff_bytes_mmx;
|
c->diff_bytes = ff_diff_bytes_mmx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if HAVE_INLINE_ASM
|
||||||
if (INLINE_MMXEXT(cpu_flags)) {
|
if (INLINE_MMXEXT(cpu_flags)) {
|
||||||
c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext;
|
c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user