From 6eda85e15b38863a627fd0602098aa3250174698 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Wed, 7 Mar 2012 21:35:13 +0100
Subject: [PATCH] sbrdsp.asm: convert all instructions to float/SSE ones.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the values are floats, using the float operations
makes sense, improves performance on some CPUs and
makes the code SSE compatible instead of needing SSE2.

Based on suggestion by Jason.

Signed-off-by: Reimar Döffinger <Reimar.Doeffinger@gmx.de>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/x86/sbrdsp.asm | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index c3b559bb15..31a1c8b76f 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -82,14 +82,14 @@ cglobal sbr_hf_g_filt, 5, 6, 5
     lea         r0, [r0 + r3*8]
     neg         r3
 .loop4:
-    movq        m0, [r2 + 4*r3 + 0]
-    movq        m1, [r2 + 4*r3 + 8]
-    movq        m2, [r1 + 0*STEP]
-    movq        m3, [r1 + 2*STEP]
+    movlps      m0, [r2 + 4*r3 + 0]
+    movlps      m1, [r2 + 4*r3 + 8]
+    movlps      m2, [r1 + 0*STEP]
+    movlps      m3, [r1 + 2*STEP]
     movhps      m2, [r1 + 1*STEP]
     movhps      m3, [r1 + 3*STEP]
-    punpckldq   m0, m0
-    punpckldq   m1, m1
+    unpcklps    m0, m0
+    unpcklps    m1, m1
     mulps       m0, m2
     mulps       m1, m3
     movu        [r0 + 8*r3 +  0], m0
@@ -101,8 +101,8 @@ cglobal sbr_hf_g_filt, 5, 6, 5
     jz          .end
 .loop1: ; element 0 and 1 can be computed at the same time
     movss       m0, [r2]
-    movq        m2, [r1]
-    punpckldq   m0, m0
+    movlps      m2, [r1]
+    unpcklps    m0, m0
     mulps       m2, m0
     movlps    [r0], m2
     add         r0, 8