mirror of https://git.ffmpeg.org/ffmpeg.git
avcodec/x86/lossless_videodsp: Port lorens add_hfyu_left_prediction_ssse3/sse4 to 16bit
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
63d2be7533
commit
83b67ca056
|
@ -1,5 +1,6 @@
|
|||
;******************************************************************************
|
||||
;* SIMD lossless video DSP utils
|
||||
;* Copyright (c) 2008 Loren Merritt
|
||||
;* Copyright (c) 2014 Michael Niedermayer
|
||||
;*
|
||||
;* This file is part of FFmpeg.
|
||||
|
@ -21,6 +22,13 @@
|
|||
|
||||
%include "libavutil/x86/x86util.asm"
|
||||
|
||||
SECTION_RODATA
|
||||
|
||||
pb_ef: times 8 db 14,15
|
||||
pb_67: times 8 db 6, 7
|
||||
pb_zzzz2323zzzzabab: db -1,-1,-1,-1, 2, 3, 2, 3,-1,-1,-1,-1,10,11,10,11
|
||||
pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
|
||||
|
||||
SECTION_TEXT
|
||||
|
||||
%macro ADD_INT16_LOOP 1 ; %1 = is_aligned
|
||||
|
@ -84,3 +92,79 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
|
|||
ADD_INT16_LOOP 1
|
||||
.unaligned:
|
||||
ADD_INT16_LOOP 0
|
||||
|
||||
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
|
||||
add wq, wq
|
||||
add srcq, wq
|
||||
add dstq, wq
|
||||
neg wq
|
||||
%%.loop:
|
||||
%if %2
|
||||
mova m1, [srcq+wq]
|
||||
%else
|
||||
movu m1, [srcq+wq]
|
||||
%endif
|
||||
mova m2, m1
|
||||
pslld m1, 16
|
||||
paddw m1, m2
|
||||
mova m2, m1
|
||||
|
||||
pshufb m1, m3
|
||||
paddw m1, m2
|
||||
pshufb m0, m5
|
||||
%if mmsize == 16
|
||||
mova m2, m1
|
||||
pshufb m1, m4
|
||||
paddw m1, m2
|
||||
%endif
|
||||
paddw m0, m1
|
||||
pand m0, m7
|
||||
%if %1
|
||||
mova [dstq+wq], m0
|
||||
%else
|
||||
movq [dstq+wq], m0
|
||||
movhps [dstq+wq+8], m0
|
||||
%endif
|
||||
add wq, mmsize
|
||||
jl %%.loop
|
||||
mov eax, mmsize-1
|
||||
sub eax, wd
|
||||
mov wd, eax
|
||||
shl wd, 8
|
||||
lea eax, [wd+eax-1]
|
||||
movd m1, eax
|
||||
pshufb m0, m1
|
||||
movd eax, m0
|
||||
RET
|
||||
%endmacro
|
||||
|
||||
; int add_hfyu_left_prediction_int16(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int left)
|
||||
INIT_MMX ssse3
|
||||
cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
|
||||
.skip_prologue:
|
||||
mova m5, [pb_67]
|
||||
mova m3, [pb_zzzz2323zzzzabab]
|
||||
movd m0, leftm
|
||||
psllq m0, 48
|
||||
movd m7, maskm
|
||||
SPLATW m7 ,m7
|
||||
ADD_HFYU_LEFT_LOOP_INT16 1, 1
|
||||
|
||||
INIT_XMM sse4
|
||||
cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
|
||||
mova m5, [pb_ef]
|
||||
mova m4, [pb_zzzzzzzz67676767]
|
||||
mova m3, [pb_zzzz2323zzzzabab]
|
||||
movd m0, leftm
|
||||
pslldq m0, 14
|
||||
movd m7, maskm
|
||||
SPLATW m7 ,m7
|
||||
test srcq, 15
|
||||
jnz .src_unaligned
|
||||
test dstq, 15
|
||||
jnz .dst_unaligned
|
||||
ADD_HFYU_LEFT_LOOP_INT16 1, 1
|
||||
.dst_unaligned:
|
||||
ADD_HFYU_LEFT_LOOP_INT16 0, 1
|
||||
.src_unaligned:
|
||||
ADD_HFYU_LEFT_LOOP_INT16 0, 0
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
|
||||
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
|
||||
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
|
||||
int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
|
||||
int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
|
||||
|
||||
void ff_llviddsp_init_x86(LLVidDSPContext *c)
|
||||
{
|
||||
|
@ -35,4 +37,12 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
|
|||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->add_int16 = ff_add_int16_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||
c->add_hfyu_left_prediction_int16 = ff_add_hfyu_left_prediction_int16_ssse3;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||
c->add_hfyu_left_prediction_int16 = ff_add_hfyu_left_prediction_int16_sse4;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue