From 395f2e70dd26524cb82d412cb938ded508df4d42 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Sun, 6 Nov 2011 20:43:13 -0500 Subject: [PATCH] dsputil: use movups instead of movdqu in ff_emu_edge_core_sse() This allows emulated_edge_mc_sse() and gmc_sse() to be used under AV_CPU_FLAG_SSE. --- libavcodec/x86/dsputil_mmx.c | 8 ++++---- libavcodec/x86/dsputil_yasm.asm | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index f0de05a763..104bd7595f 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2874,6 +2874,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) #if HAVE_YASM c->scalarproduct_float = ff_scalarproduct_float_sse; c->butterflies_float_interleave = ff_butterflies_float_interleave_sse; + + if (!high_bit_depth) + c->emulated_edge_mc = emulated_edge_mc_sse; + c->gmc = gmc_sse; #endif } if (HAVE_AMD3DNOW && (mm_flags & AV_CPU_FLAG_3DNOW)) @@ -2894,10 +2898,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->apply_window_int16 = ff_apply_window_int16_sse2; } } - - if (!high_bit_depth) - c->emulated_edge_mc = emulated_edge_mc_sse; - c->gmc= gmc_sse; #endif } if (mm_flags & AV_CPU_FLAG_SSSE3) { diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index f2894cd501..8723a7e0b0 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -637,7 +637,7 @@ cglobal emu_edge_core_%1, 2, 7, 0 %ifnidn %3, mmx %rep %2/16 - movdqu xmm %+ %%sxidx, [r1+%%src_off] + movups xmm %+ %%sxidx, [r1+%%src_off] %assign %%src_off %%src_off+16 %assign %%sxidx %%sxidx+1 %endrep ; %2/16 @@ -686,7 +686,7 @@ cglobal emu_edge_core_%1, 2, 7, 0 %ifnidn %3, mmx %rep %2/16 - movdqu [r0+%%dst_off], xmm %+ %%dxidx + movups [r0+%%dst_off], xmm %+ %%dxidx %assign %%dst_off %%dst_off+16 %assign %%dxidx %%dxidx+1 %endrep ; %2/16 @@ -915,7 +915,7 @@ ALIGN 64 %define linesize r2m V_COPY_NPX %1, mm0, movq, 8, 0xFFFFFFF8 %else ; !mmx - V_COPY_NPX %1, xmm0, movdqu, 16, 0xFFFFFFF0 + V_COPY_NPX %1, xmm0, movups, 16, 0xFFFFFFF0 %ifdef ARCH_X86_64 %define linesize r2 V_COPY_NPX %1, rax , mov, 8