From 4c9621ece7c7a5bd2da22ffbf64fcd730775275d Mon Sep 17 00:00:00 2001 From: atmos4 Date: Thu, 29 Nov 2001 18:05:42 +0000 Subject: [PATCH] Disable SSE code and reenable FPU dct for SSE cpus (fpu code is 0.3% faster and I don't get data aligned in dct64_sse.s, so I can't finish optimizing it) git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@3204 b3059339-0415-0410-9bf9-f77b7e298cf2 --- mp3lib/Makefile | 8 ++++---- mp3lib/dct64_sse.s | 16 ++++++++++------ mp3lib/sr1.c | 12 ++++++------ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/mp3lib/Makefile b/mp3lib/Makefile index 5295eabab7..71247c3360 100644 --- a/mp3lib/Makefile +++ b/mp3lib/Makefile @@ -15,10 +15,10 @@ ifeq ($(TARGET_MMX),yes) SRCS += decode_MMX.s dct64_MMX.s tabinit_MMX.s OBJS += decode_MMX.o dct64_MMX.o tabinit_MMX.o endif -ifeq ($(TARGET_SSE),yes) -SRCS += dct64_sse.s -OBJS += dct64_sse.o -endif +#ifeq ($(TARGET_SSE),yes) +#SRCS += dct64_sse.s +#OBJS += dct64_sse.o +#endif ifeq ($(TARGET_3DNOW),yes) SRCS += dct36_3dnow.s dct64_3dnow.s OBJS += dct36_3dnow.o dct64_3dnow.o diff --git a/mp3lib/dct64_sse.s b/mp3lib/dct64_sse.s index 922e1c881a..3bc74cc8c0 100644 --- a/mp3lib/dct64_sse.s +++ b/mp3lib/dct64_sse.s @@ -1,9 +1,13 @@ -# This code is a translation of dct64_k7.s from MPlayer. -# Coded by Felix Buenemann -# -# TODO: - fix phases 4 and 5 (sse) -# - optimize scalar FPU code? (interleave with sse code) -# +/ This code is a translation of dct64_k7.s from MPlayer. +/ Coded by Felix Buenemann +/ +/ TODO: - fix phases 4 and 5 (sse) +/ - optimize scalar FPU code? (interleave with sse code) +/ - fix alignment (prohibits finishing this code) +/ - then use faster insns for aligned data +/ +/ Note: currently code is disabled as I couldn't get input data aligned! +/ //.data // .align 8 diff --git a/mp3lib/sr1.c b/mp3lib/sr1.c index a664c9dc88..601c94f211 100644 --- a/mp3lib/sr1.c +++ b/mp3lib/sr1.c @@ -409,8 +409,8 @@ void MP3_Init(){ Note: It's ok, Since K8 will have SSE2 support and will much faster of P4 ;) */ - printf( "mp3lib: Using SSE%s! optimized decore.\n",(_isse>1?"2":"")); -// printf( "mp3lib: Using Pentium%s optimized decore.\n",(_i586>1?"-MMX":"")); +// printf( "mp3lib: Using SSE%s! optimized decore.\n",(_isse>1?"2":"")); + printf( "mp3lib: Using Pentium%s optimized decore.\n",(_i586>1?"-MMX":"")); else if(_3dnow) printf( "mp3lib: Using AMD 3dnow%s! optimized decore.\n",(_3dnow>1?"-dsp(k7)":"")); @@ -443,14 +443,14 @@ void MP3_Init(){ tables_done_flag=1; dct36_func=dct36; -#ifdef HAVE_SSE +/*#ifdef HAVE_SSE if(_isse) { synth_func=synth_1to1_MMX; dct64_MMX_func=dct64_MMX_sse; } else -#endif +#endif*/ #ifdef HAVE_3DNOWEX if ( _3dnow > 1 ) { @@ -518,7 +518,7 @@ void MP3_Init(){ tables_done_flag=1; dct36_func=dct36; -#ifdef HAVE_SSE +/*#ifdef HAVE_SSE if(gCpuCaps.hasSSE) { synth_func=synth_1to1_MMX; @@ -526,7 +526,7 @@ void MP3_Init(){ printf("mp3lib: using SSE optimized decore!\n"); } else -#endif +#endif*/ #ifdef HAVE_3DNOWEX if (gCpuCaps.has3DNowExt) {