mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-03-06 04:28:38 +00:00
x86/dsputilenc: implement SSE2 version of diff_pixels
Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
a0c5cd3475
commit
e64e079ece
@ -419,6 +419,31 @@ cglobal diff_pixels, 4,5
|
||||
jne .loop
|
||||
REP_RET
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal diff_pixels, 4, 5, 5
|
||||
movsxdifnidn r3, r3d
|
||||
pxor m4, m4
|
||||
add r0, 128
|
||||
mov r4, -128
|
||||
.loop:
|
||||
movh m0, [r1]
|
||||
movh m2, [r2]
|
||||
movh m1, [r1+r3]
|
||||
movh m3, [r2+r3]
|
||||
punpcklbw m0, m4
|
||||
punpcklbw m1, m4
|
||||
punpcklbw m2, m4
|
||||
punpcklbw m3, m4
|
||||
psubw m0, m2
|
||||
psubw m1, m3
|
||||
mova [r0+r4+0 ], m0
|
||||
mova [r0+r4+16], m1
|
||||
lea r1, [r1+r3*2]
|
||||
lea r2, [r2+r3*2]
|
||||
add r4, 32
|
||||
jne .loop
|
||||
RET
|
||||
|
||||
INIT_MMX mmx
|
||||
; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
|
||||
cglobal pix_sum16, 2, 3
|
||||
|
@ -36,6 +36,8 @@ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
|
||||
void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
|
||||
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
||||
int stride);
|
||||
void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
|
||||
int stride);
|
||||
int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
|
||||
int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
|
||||
int ff_sum_abs_dctelem_mmx(int16_t *block);
|
||||
@ -971,6 +973,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
c->sse[0] = ff_sse16_sse2;
|
||||
c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
|
||||
c->diff_pixels = ff_diff_pixels_sse2;
|
||||
|
||||
#if HAVE_ALIGNED_STACK
|
||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
|
||||
|
Loading…
Reference in New Issue
Block a user