diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm index c3b559bb15..31a1c8b76f 100644 --- a/libavcodec/x86/sbrdsp.asm +++ b/libavcodec/x86/sbrdsp.asm @@ -82,14 +82,14 @@ cglobal sbr_hf_g_filt, 5, 6, 5 lea r0, [r0 + r3*8] neg r3 .loop4: - movq m0, [r2 + 4*r3 + 0] - movq m1, [r2 + 4*r3 + 8] - movq m2, [r1 + 0*STEP] - movq m3, [r1 + 2*STEP] + movlps m0, [r2 + 4*r3 + 0] + movlps m1, [r2 + 4*r3 + 8] + movlps m2, [r1 + 0*STEP] + movlps m3, [r1 + 2*STEP] movhps m2, [r1 + 1*STEP] movhps m3, [r1 + 3*STEP] - punpckldq m0, m0 - punpckldq m1, m1 + unpcklps m0, m0 + unpcklps m1, m1 mulps m0, m2 mulps m1, m3 movu [r0 + 8*r3 + 0], m0 @@ -101,8 +101,8 @@ cglobal sbr_hf_g_filt, 5, 6, 5 jz .end .loop1: ; element 0 and 1 can be computed at the same time movss m0, [r2] - movq m2, [r1] - punpckldq m0, m0 + movlps m2, [r1] + unpcklps m0, m0 mulps m2, m0 movlps [r0], m2 add r0, 8