avcodec/x86/lossless_videodsp: add diff_int16_mmx/sse2

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2014-01-22 19:41:21 +01:00
parent 7b89e24151
commit 631939bde6
2 changed files with 70 additions and 0 deletions

View File

@ -93,6 +93,72 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
.unaligned:
ADD_INT16_LOOP 0
%macro DIFF_INT16_LOOP 1 ; %1 = is_aligned
movd m4, maskd
SPLATW m4, m4
add wq, wq
test wq, 2*mmsize - 1
jz %%.tomainloop
%%.wordloop:
sub wq, 2
mov ax, [src1q+wq]
sub ax, [src2q+wq]
and ax, maskw
mov [dstq+wq], ax
test wq, 2*mmsize - 1
jnz %%.wordloop
%%.tomainloop:
add src1q, wq
add src2q, wq
add dstq, wq
neg wq
jz %%.end
%%.loop:
%if %1
mova m0, [src1q+wq]
mova m1, [src2q+wq]
mova m2, [src1q+wq+mmsize]
mova m3, [src2q+wq+mmsize]
%else
movu m0, [src1q+wq]
movu m1, [src2q+wq]
movu m2, [src1q+wq+mmsize]
movu m3, [src2q+wq+mmsize]
%endif
psubw m0, m1
psubw m2, m3
pand m0, m4
pand m2, m4
%if %1
mova [dstq+wq] , m0
mova [dstq+wq+mmsize], m2
%else
movu [dstq+wq] , m0
movu [dstq+wq+mmsize], m2
%endif
add wq, 2*mmsize
jl %%.loop
%%.end:
RET
%endmacro
INIT_MMX mmx
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
DIFF_INT16_LOOP 1
INIT_XMM sse2
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
test src1q, mmsize-1
jnz .unaligned
test src2q, mmsize-1
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
DIFF_INT16_LOOP 1
.unaligned:
DIFF_INT16_LOOP 0
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
add wq, wq
add srcq, wq

View File

@ -23,6 +23,8 @@
void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
@ -32,10 +34,12 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
if (EXTERNAL_MMX(cpu_flags)) {
c->add_int16 = ff_add_int16_mmx;
c->diff_int16 = ff_diff_int16_mmx;
}
if (EXTERNAL_SSE2(cpu_flags)) {
c->add_int16 = ff_add_int16_sse2;
c->diff_int16 = ff_diff_int16_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {