diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index 0d8cae354a..623183354d 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -24,61 +24,7 @@ SECTION .text - -%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub - movd m4, maskd - SPLATW m4, m4 - add wd, wd - test wq, 2*mmsize - 1 - jz %%.tomainloop - push tmpq -%%.wordloop: - sub wq, 2 -%ifidn %2, add - mov tmpw, [srcq+wq] - add tmpw, [dstq+wq] -%else - mov tmpw, [src1q+wq] - sub tmpw, [src2q+wq] -%endif - and tmpw, maskw - mov [dstq+wq], tmpw - test wq, 2*mmsize - 1 - jnz %%.wordloop - pop tmpq -%%.tomainloop: -%ifidn %2, add - add srcq, wq -%else - add src1q, wq - add src2q, wq -%endif - add dstq, wq - neg wq - jz %%.end -%%.loop: -%ifidn %2, add - mov%1 m0, [srcq+wq] - mov%1 m1, [dstq+wq] - mov%1 m2, [srcq+wq+mmsize] - mov%1 m3, [dstq+wq+mmsize] -%else - mov%1 m0, [src1q+wq] - mov%1 m1, [src2q+wq] - mov%1 m2, [src1q+wq+mmsize] - mov%1 m3, [src2q+wq+mmsize] -%endif - p%2w m0, m1 - p%2w m2, m3 - pand m0, m4 - pand m2, m4 - mov%1 [dstq+wq] , m0 - mov%1 [dstq+wq+mmsize], m2 - add wq, 2*mmsize - jl %%.loop -%%.end: - RET -%endmacro +%include "libavcodec/x86/huffyuvdsp_template.asm" %if ARCH_X86_32 INIT_MMX mmx diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c index 26cf6214d8..a522074565 100644 --- a/libavcodec/x86/huffyuvdsp_init.c +++ b/libavcodec/x86/huffyuvdsp_init.c @@ -28,6 +28,7 @@ void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); + void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, intptr_t w, uint8_t *left); void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src, diff --git a/libavcodec/x86/huffyuvdsp_template.asm b/libavcodec/x86/huffyuvdsp_template.asm new file mode 100644 index 0000000000..7e14542671 --- /dev/null +++ b/libavcodec/x86/huffyuvdsp_template.asm @@ -0,0 +1,76 @@ +;****************************************************************************** +;* SIMD-optimized HuffYUV functions +;* Copyright (c) 2008 Loren Merritt +;* Copyright (c) 2014 Christophe Gisquet +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub + movd m4, maskd + SPLATW m4, m4 + add wd, wd + test wq, 2*mmsize - 1 + jz %%.tomainloop + push tmpq +%%.wordloop: + sub wq, 2 +%ifidn %2, add + mov tmpw, [srcq+wq] + add tmpw, [dstq+wq] +%else + mov tmpw, [src1q+wq] + sub tmpw, [src2q+wq] +%endif + and tmpw, maskw + mov [dstq+wq], tmpw + test wq, 2*mmsize - 1 + jnz %%.wordloop + pop tmpq +%%.tomainloop: +%ifidn %2, add + add srcq, wq +%else + add src1q, wq + add src2q, wq +%endif + add dstq, wq + neg wq + jz %%.end +%%.loop: +%ifidn %2, add + mov%1 m0, [srcq+wq] + mov%1 m1, [dstq+wq] + mov%1 m2, [srcq+wq+mmsize] + mov%1 m3, [dstq+wq+mmsize] +%else + mov%1 m0, [src1q+wq] + mov%1 m1, [src2q+wq] + mov%1 m2, [src1q+wq+mmsize] + mov%1 m3, [src2q+wq+mmsize] +%endif + p%2w m0, m1 + p%2w m2, m3 + pand m0, m4 + pand m2, m4 + mov%1 [dstq+wq] , m0 + mov%1 [dstq+wq+mmsize], m2 + add wq, 2*mmsize + jl %%.loop +%%.end: + RET +%endmacro diff --git a/libavcodec/x86/huffyuvencdsp.asm b/libavcodec/x86/huffyuvencdsp.asm index eeef81ab8e..6a7f1a0f49 100644 --- a/libavcodec/x86/huffyuvencdsp.asm +++ b/libavcodec/x86/huffyuvencdsp.asm @@ -27,62 +27,10 @@ SECTION .text +%include "libavcodec/x86/huffyuvdsp_template.asm" + ; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ; unsigned mask, int w); -%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub - movd m4, maskd - SPLATW m4, m4 - add wd, wd - test wq, 2*mmsize - 1 - jz %%.tomainloop - push tmpq -%%.wordloop: - sub wq, 2 -%ifidn %2, add - mov tmpw, [srcq+wq] - add tmpw, [dstq+wq] -%else - mov tmpw, [src1q+wq] - sub tmpw, [src2q+wq] -%endif - and tmpw, maskw - mov [dstq+wq], tmpw - test wq, 2*mmsize - 1 - jnz %%.wordloop - pop tmpq -%%.tomainloop: -%ifidn %2, add - add srcq, wq -%else - add src1q, wq - add src2q, wq -%endif - add dstq, wq - neg wq - jz %%.end -%%.loop: -%ifidn %2, add - mov%1 m0, [srcq+wq] - mov%1 m1, [dstq+wq] - mov%1 m2, [srcq+wq+mmsize] - mov%1 m3, [dstq+wq+mmsize] -%else - mov%1 m0, [src1q+wq] - mov%1 m1, [src2q+wq] - mov%1 m2, [src1q+wq+mmsize] - mov%1 m3, [src2q+wq+mmsize] -%endif - p%2w m0, m1 - p%2w m2, m3 - pand m0, m4 - pand m2, m4 - mov%1 [dstq+wq] , m0 - mov%1 [dstq+wq+mmsize], m2 - add wq, 2*mmsize - jl %%.loop -%%.end: - RET -%endmacro %if ARCH_X86_32 INIT_MMX mmx