mpv/mp3lib/decod386.c

/*
 * Mpeg Layer-1,2,3 audio decoder
 * ------------------------------
 * copyright (c) 1995,1996,1997 by Michael Hipp, All rights reserved.
 * See also 'README'
 *
 * slighlty optimized for machines without autoincrement/decrement.
 * The performance is highly compiler dependend. Maybe
 * the decode.c version for 'normal' processor may be faster
 * even for Intel processors.
 */


#include "../config.h"

#if 0
 /* old WRITE_SAMPLE */
   /* is portable */
#define WRITE_SAMPLE(samples,sum,clip) {			\
  if( (sum) > 32767.0) { *(samples) = 0x7fff; (clip)++; }	\
  else if( (sum) < -32768.0) { *(samples) = -0x8000; (clip)++; }\
  else { *(samples) = sum;  }					\
}
#else
 /* new WRITE_SAMPLE */

/*
 * should be the same as the "old WRITE_SAMPLE" macro above, but uses
 * some tricks to avoid double->int conversions and floating point compares.
 *
 * Here's how it works:
 * ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) is
 * 0x0010000080000000LL in hex.  It computes 0x0010000080000000LL + sum
 * as a double IEEE fp value and extracts the low-order 32-bits from the
 * IEEE fp representation stored in memory.  The 2^56 bit in the constant
 * is intended to force the bits of "sum" into the least significant bits
 * of the double mantissa.  After an integer substraction of 0x80000000
 * we have the original double value "sum" converted to an 32-bit int value.
 *
 * (Is that really faster than the clean and simple old version of the macro?)
 */

/*
 * On a SPARC cpu, we fetch the low-order 32-bit from the second 32-bit
 * word of the double fp value stored in memory.  On an x86 cpu, we fetch it
 * from the first 32-bit word.
 * I'm not sure if the WORDS_BIGENDIAN feature test covers all possible memory
 * layouts of double floating point values an all cpu architectures.  If
 * it doesn't work for you, just enable the "old WRITE_SAMPLE" macro.
 */
#if WORDS_BIGENDIAN
#define	MANTISSA_OFFSET	1
#else
#define	MANTISSA_OFFSET	0
#endif

   /* sizeof(int) == 4 */
#define WRITE_SAMPLE(samples,sum,clip) { \
  double dtemp; long v;                  \
  dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\
  v = (((int *)&dtemp)[MANTISSA_OFFSET] - 0x80000000); \
  if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \
  else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \
  else { *(samples) = v; } \
}
#endif


/*
#define WRITE_SAMPLE(samples,sum,clip) { \
  double dtemp; int v;                    \
  dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\
  v = ((*(int *)&dtemp) - 0x80000000); \
  if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \
  else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \
  else { *(samples) = v; } \
}
*/

static int synth_1to1_mono(real *bandPtr,unsigned char *samples,int *pnt)
{
  short samples_tmp[64];
  short *tmp1 = samples_tmp;
  int i,ret;
  int pnt1 = 0;

  ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1);
  samples += *pnt;

  for(i=0;i<32;i++) {
    *( (short *) samples) = *tmp1;
    samples += 2;
    tmp1 += 2;
  }
  *pnt += 64;

  return ret;
}


static int synth_1to1_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt)
{
  int i,ret;

  ret = synth_1to1(bandPtr,0,samples,pnt);
  samples = samples + *pnt - 128;

  for(i=0;i<32;i++) {
    ((short *)samples)[1] = ((short *)samples)[0];
    samples+=4;
  }

  return ret;
}


#ifdef USE_FAKE_MONO
static int synth_1to1_l(real *bandPtr,int channel,unsigned char *out,int *pnt)
{
  int i,ret;

  ret = synth_1to1(bandPtr,channel,out,pnt);
  out = out + *pnt - 128;

  for(i=0;i<32;i++) {
    ((short *)out)[1] = ((short *)out)[0];
    out+=4;
  }

  return ret;
}


static int synth_1to1_r(real *bandPtr,int channel,unsigned char *out,int *pnt)
{
  int i,ret;

  ret = synth_1to1(bandPtr,channel,out,pnt);
  out = out + *pnt - 128;

  for(i=0;i<32;i++) {
    ((short *)out)[0] = ((short *)out)[1];
    out+=4;
  }

  return ret;
}
#endif

synth_func_t synth_func;

#ifdef HAVE_MMX
int synth_1to1_MMX( real *bandPtr,int channel,short * samples)
{
    static short buffs[2][2][0x110];
    static int bo = 1;
    synth_1to1_MMX_s(bandPtr, channel, samples, (short *) buffs, &bo); 
    return 0;
} 
#endif
static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt)
{
  static real buffs[2][2][0x110];
  static const int step = 2;
  static int bo = 1;
  short *samples = (short *) (out + *pnt);

  real *b0,(*buf)[0x110];
  int clip = 0;
  int bo1;
#ifdef ARCH_X86
  if ( synth_func )
   {
    int ret;
    ret=(*synth_func)( bandPtr,channel,samples);
    *pnt+=128;
    return ret;
   }
#endif
  if(!channel) {     /* channel=0 */
    bo--;
    bo &= 0xf;
    buf = buffs[0];
  }
  else {
    samples++;
    buf = buffs[1];
  }

  if(bo & 0x1) {
    b0 = buf[0];
    bo1 = bo;
    dct64(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr);
  }
  else {
    b0 = buf[1];
    bo1 = bo+1;
    dct64(buf[0]+bo,buf[1]+bo+1,bandPtr);
  }

  {
    register int j;
    real *window = decwin + 16 - bo1;

    for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step)
    {
      real sum;
      sum  = window[0x0] * b0[0x0];
      sum -= window[0x1] * b0[0x1];
      sum += window[0x2] * b0[0x2];
      sum -= window[0x3] * b0[0x3];
      sum += window[0x4] * b0[0x4];
      sum -= window[0x5] * b0[0x5];
      sum += window[0x6] * b0[0x6];
      sum -= window[0x7] * b0[0x7];
      sum += window[0x8] * b0[0x8];
      sum -= window[0x9] * b0[0x9];
      sum += window[0xA] * b0[0xA];
      sum -= window[0xB] * b0[0xB];
      sum += window[0xC] * b0[0xC];
      sum -= window[0xD] * b0[0xD];
      sum += window[0xE] * b0[0xE];
      sum -= window[0xF] * b0[0xF];

      WRITE_SAMPLE(samples,sum,clip);
    }

    {
      real sum;
      sum  = window[0x0] * b0[0x0];
      sum += window[0x2] * b0[0x2];
      sum += window[0x4] * b0[0x4];
      sum += window[0x6] * b0[0x6];
      sum += window[0x8] * b0[0x8];
      sum += window[0xA] * b0[0xA];
      sum += window[0xC] * b0[0xC];
      sum += window[0xE] * b0[0xE];
      WRITE_SAMPLE(samples,sum,clip);
      b0-=0x10,window-=0x20,samples+=step;
    }
    window += bo1<<1;

    for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step)
    {
      real sum;
      sum = -window[-0x1] * b0[0x0];
      sum -= window[-0x2] * b0[0x1];
      sum -= window[-0x3] * b0[0x2];
      sum -= window[-0x4] * b0[0x3];
      sum -= window[-0x5] * b0[0x4];
      sum -= window[-0x6] * b0[0x5];
      sum -= window[-0x7] * b0[0x6];
      sum -= window[-0x8] * b0[0x7];
      sum -= window[-0x9] * b0[0x8];
      sum -= window[-0xA] * b0[0x9];
      sum -= window[-0xB] * b0[0xA];
      sum -= window[-0xC] * b0[0xB];
      sum -= window[-0xD] * b0[0xC];
      sum -= window[-0xE] * b0[0xD];
      sum -= window[-0xF] * b0[0xE];
      sum -= window[-0x0] * b0[0xF];

      WRITE_SAMPLE(samples,sum,clip);
    }
  }
  *pnt += 128;

  return clip;

}
Initial revision git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-02-24 20:28:24 +00:00			`/*`
			`* Mpeg Layer-1,2,3 audio decoder`
			`* ------------------------------`
			`* copyright (c) 1995,1996,1997 by Michael Hipp, All rights reserved.`
			`* See also 'README'`
			`*`
			`* slighlty optimized for machines without autoincrement/decrement.`
			`* The performance is highly compiler dependend. Maybe`
			`* the decode.c version for 'normal' processor may be faster`
			`* even for Intel processors.`
			`*/`


mp3 audio decoding didn't work on big-endian architectures git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1319 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-07-14 16:52:09 +00:00			`#include "../config.h"`
Initial revision git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-02-24 20:28:24 +00:00
mp3 audio decoding didn't work on big-endian architectures git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1319 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-07-14 16:52:09 +00:00			`#if 0`
			`/* old WRITE_SAMPLE */`
			`/* is portable */`
			`#define WRITE_SAMPLE(samples,sum,clip) { \`
			`if( (sum) > 32767.0) { *(samples) = 0x7fff; (clip)++; } \`
			`else if( (sum) < -32768.0) { *(samples) = -0x8000; (clip)++; }\`
			`else { *(samples) = sum; } \`
			`}`
			`#else`
Initial revision git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-02-24 20:28:24 +00:00			`/* new WRITE_SAMPLE */`
mp3 audio decoding didn't work on big-endian architectures git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1319 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-07-14 16:52:09 +00:00
			`/*`
			`* should be the same as the "old WRITE_SAMPLE" macro above, but uses`
			`* some tricks to avoid double->int conversions and floating point compares.`
			`*`
			`* Here's how it works:`
			`* ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) is`
			`* 0x0010000080000000LL in hex. It computes 0x0010000080000000LL + sum`
			`* as a double IEEE fp value and extracts the low-order 32-bits from the`
			`* IEEE fp representation stored in memory. The 2^56 bit in the constant`
			`* is intended to force the bits of "sum" into the least significant bits`
			`* of the double mantissa. After an integer substraction of 0x80000000`
			`* we have the original double value "sum" converted to an 32-bit int value.`
			`*`
			`* (Is that really faster than the clean and simple old version of the macro?)`
			`*/`

			`/*`
			`* On a SPARC cpu, we fetch the low-order 32-bit from the second 32-bit`
			`* word of the double fp value stored in memory. On an x86 cpu, we fetch it`
			`* from the first 32-bit word.`
			`* I'm not sure if the WORDS_BIGENDIAN feature test covers all possible memory`
			`* layouts of double floating point values an all cpu architectures. If`
			`* it doesn't work for you, just enable the "old WRITE_SAMPLE" macro.`
			`*/`
			`#if WORDS_BIGENDIAN`
			`#define MANTISSA_OFFSET 1`
			`#else`
			`#define MANTISSA_OFFSET 0`
			`#endif`

Initial revision git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-02-24 20:28:24 +00:00			`/* sizeof(int) == 4 */`
			`#define WRITE_SAMPLE(samples,sum,clip) { \`
			`double dtemp; long v; \`
			`dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\`
mp3 audio decoding didn't work on big-endian architectures git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1319 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-07-14 16:52:09 +00:00			`v = (((int *)&dtemp)[MANTISSA_OFFSET] - 0x80000000); \`
Initial revision git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-02-24 20:28:24 +00:00			`if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \`
			`else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \`
			`else { *(samples) = v; } \`
			`}`
mp3 audio decoding didn't work on big-endian architectures git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1319 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-07-14 16:52:09 +00:00			`#endif`
Initial revision git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-02-24 20:28:24 +00:00

			`/*`
			`#define WRITE_SAMPLE(samples,sum,clip) { \`
			`double dtemp; int v; \`
			`dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\`
			`v = (((int )&dtemp) - 0x80000000); \`
			`if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \`
			`else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \`
			`else { *(samples) = v; } \`
			`}`
			`*/`

			`static int synth_1to1_mono(real bandPtr,unsigned char samples,int *pnt)`
			`{`
			`short samples_tmp[64];`
			`short *tmp1 = samples_tmp;`
			`int i,ret;`
			`int pnt1 = 0;`

			`ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1);`
			`samples += *pnt;`

			`for(i=0;i<32;i++) {`
			`( (short ) samples) = *tmp1;`
			`samples += 2;`
			`tmp1 += 2;`
			`}`
			`*pnt += 64;`

			`return ret;`
			`}`


			`static int synth_1to1_mono2stereo(real bandPtr,unsigned char samples,int *pnt)`
			`{`
			`int i,ret;`

			`ret = synth_1to1(bandPtr,0,samples,pnt);`
			`samples = samples + *pnt - 128;`

			`for(i=0;i<32;i++) {`
			`((short )samples)[1] = ((short )samples)[0];`
			`samples+=4;`
			`}`

			`return ret;`
			`}`


applied 'fakemono' patch by Bryan Chan scorpio@acm.org git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@733 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-05-08 21:49:00 +00:00			`#ifdef USE_FAKE_MONO`
			`static int synth_1to1_l(real bandPtr,int channel,unsigned char out,int *pnt)`
			`{`
			`int i,ret;`

			`ret = synth_1to1(bandPtr,channel,out,pnt);`
			`out = out + *pnt - 128;`

			`for(i=0;i<32;i++) {`
			`((short )out)[1] = ((short )out)[0];`
			`out+=4;`
			`}`

			`return ret;`
			`}`


			`static int synth_1to1_r(real bandPtr,int channel,unsigned char out,int *pnt)`
			`{`
			`int i,ret;`

			`ret = synth_1to1(bandPtr,channel,out,pnt);`
			`out = out + *pnt - 128;`

			`for(i=0;i<32;i++) {`
			`((short )out)[0] = ((short )out)[1];`
			`out+=4;`
			`}`

			`return ret;`
			`}`
			`#endif`

Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1246 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-06-29 17:55:35 +00:00			`synth_func_t synth_func;`

Portability and old binutils support git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1259 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-07-03 07:50:52 +00:00			`#ifdef HAVE_MMX`
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1246 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-06-29 17:55:35 +00:00			`int synth_1to1_MMX( real bandPtr,int channel,short samples)`
			`{`
			`static short buffs[2][2][0x110];`
			`static int bo = 1;`
			`synth_1to1_MMX_s(bandPtr, channel, samples, (short *) buffs, &bo);`
			`return 0;`
Portability and old binutils support git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1259 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-07-03 07:50:52 +00:00			`}`
			`#endif`
Initial revision git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-02-24 20:28:24 +00:00			`static int synth_1to1(real bandPtr,int channel,unsigned char out,int *pnt)`
			`{`
			`static real buffs[2][2][0x110];`
			`static const int step = 2;`
			`static int bo = 1;`
			`short samples = (short ) (out + *pnt);`

			`real b0,(buf)[0x110];`
			`int clip = 0;`
			`int bo1;`
Portability and old binutils support git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1259 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-07-03 07:50:52 +00:00			`#ifdef ARCH_X86`
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1246 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-06-29 17:55:35 +00:00			`if ( synth_func )`
K7 3dnow-dsp support git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@737 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-05-09 07:59:55 +00:00			`{`
			`int ret;`
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1246 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-06-29 17:55:35 +00:00			`ret=(*synth_func)( bandPtr,channel,samples);`
K7 3dnow-dsp support git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@737 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-05-09 07:59:55 +00:00			`*pnt+=128;`
			`return ret;`
			`}`
Portability and old binutils support git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1259 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-07-03 07:50:52 +00:00			`#endif`
Initial revision git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@2 b3059339-0415-0410-9bf9-f77b7e298cf2 2001-02-24 20:28:24 +00:00			`if(!channel) { /* channel=0 */`
			`bo--;`
			`bo &= 0xf;`
			`buf = buffs[0];`
			`}`
			`else {`
			`samples++;`
			`buf = buffs[1];`
			`}`

			`if(bo & 0x1) {`
			`b0 = buf[0];`
			`bo1 = bo;`
			`dct64(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr);`
			`}`
			`else {`
			`b0 = buf[1];`
			`bo1 = bo+1;`
			`dct64(buf[0]+bo,buf[1]+bo+1,bandPtr);`
			`}`

			`{`
			`register int j;`
			`real *window = decwin + 16 - bo1;`

			`for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step)`
			`{`
			`real sum;`
			`sum = window[0x0] * b0[0x0];`
			`sum -= window[0x1] * b0[0x1];`
			`sum += window[0x2] * b0[0x2];`
			`sum -= window[0x3] * b0[0x3];`
			`sum += window[0x4] * b0[0x4];`
			`sum -= window[0x5] * b0[0x5];`
			`sum += window[0x6] * b0[0x6];`
			`sum -= window[0x7] * b0[0x7];`
			`sum += window[0x8] * b0[0x8];`
			`sum -= window[0x9] * b0[0x9];`
			`sum += window[0xA] * b0[0xA];`
			`sum -= window[0xB] * b0[0xB];`
			`sum += window[0xC] * b0[0xC];`
			`sum -= window[0xD] * b0[0xD];`
			`sum += window[0xE] * b0[0xE];`
			`sum -= window[0xF] * b0[0xF];`

			`WRITE_SAMPLE(samples,sum,clip);`
			`}`

			`{`
			`real sum;`
			`sum = window[0x0] * b0[0x0];`
			`sum += window[0x2] * b0[0x2];`
			`sum += window[0x4] * b0[0x4];`
			`sum += window[0x6] * b0[0x6];`
			`sum += window[0x8] * b0[0x8];`
			`sum += window[0xA] * b0[0xA];`
			`sum += window[0xC] * b0[0xC];`
			`sum += window[0xE] * b0[0xE];`
			`WRITE_SAMPLE(samples,sum,clip);`
			`b0-=0x10,window-=0x20,samples+=step;`
			`}`
			`window += bo1<<1;`

			`for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step)`
			`{`
			`real sum;`
			`sum = -window[-0x1] * b0[0x0];`
			`sum -= window[-0x2] * b0[0x1];`
			`sum -= window[-0x3] * b0[0x2];`
			`sum -= window[-0x4] * b0[0x3];`
			`sum -= window[-0x5] * b0[0x4];`
			`sum -= window[-0x6] * b0[0x5];`
			`sum -= window[-0x7] * b0[0x6];`
			`sum -= window[-0x8] * b0[0x7];`
			`sum -= window[-0x9] * b0[0x8];`
			`sum -= window[-0xA] * b0[0x9];`
			`sum -= window[-0xB] * b0[0xA];`
			`sum -= window[-0xC] * b0[0xB];`
			`sum -= window[-0xD] * b0[0xC];`
			`sum -= window[-0xE] * b0[0xD];`
			`sum -= window[-0xF] * b0[0xE];`
			`sum -= window[-0x0] * b0[0xF];`

			`WRITE_SAMPLE(samples,sum,clip);`
			`}`
			`}`
			`*pnt += 128;`

			`return clip;`

			`}`