From e1958604fdbf67aa8c300d3700c8805db039919d Mon Sep 17 00:00:00 2001
From: Fabrice Bellard <fabrice@bellard.org>
Date: Tue, 7 Jan 2003 17:41:43 +0000
Subject: [PATCH] added define for builtins use - inverse fix by Romain Dolbeau

Originally committed as revision 1410 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/i386/fft_sse.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/libavcodec/i386/fft_sse.c b/libavcodec/i386/fft_sse.c
index 8e8e36b0f7..175cea506c 100644
--- a/libavcodec/i386/fft_sse.c
+++ b/libavcodec/i386/fft_sse.c
@@ -19,11 +19,16 @@
 #include "../dsputil.h"
 #include <math.h>
 
+#ifdef HAVE_BUILTIN_VECTOR
+
 #include <xmmintrin.h>
 
 static const float p1p1p1m1[4] __attribute__((aligned(16))) = 
     { 1.0, 1.0, 1.0, -1.0 };
 
+static const float p1p1m1p1[4] __attribute__((aligned(16))) = 
+    { 1.0, 1.0, -1.0, 1.0 };
+
 static const float p1p1m1m1[4] __attribute__((aligned(16))) = 
     { 1.0, 1.0, -1.0, -1.0 };
 
@@ -54,6 +59,11 @@ void fft_calc_sse(FFTContext *s, FFTComplex *z)
         r = (__m128 *)&z[0];
         c1 = *(__m128 *)p1p1m1m1;
         c2 = *(__m128 *)p1p1p1m1;
+        if (s->inverse)
+            c2 = *(__m128 *)p1p1m1p1;
+        else
+            c2 = *(__m128 *)p1p1p1m1;
+
         j = (np >> 2);
         do {
             a = r[0];
@@ -126,3 +136,5 @@ void fft_calc_sse(FFTContext *s, FFTComplex *z)
         nloops = nloops << 1;
     } while (nblocks != 0);
 }
+
+#endif