git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@4148 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
nick 2002-01-14 09:32:51 +00:00
parent 8629e9e819
commit 2da69665f6
3 changed files with 308 additions and 322 deletions

View File

@ -9,7 +9,7 @@ OPTFLAGS := $(OPTFLAGS:-O4=-O0)
endif
CFLAGS = $(OPTFLAGS) $(EXTRA_INC)
ifeq ($(TARGET_ARCH_X86),yes)
SRCS += d_cpu.s decode_i586.s
SRCS += d_cpu.s decode_i586.c
OBJS += d_cpu.o decode_i586.o
ifeq ($(TARGET_MMX),yes)
SRCS += decode_MMX.c dct64_MMX.s tabinit_MMX.c

307
mp3lib/decode_i586.c Normal file
View File

@ -0,0 +1,307 @@
/*
* mpg123_synth_1to1 works the same way as the c version of this
* file. only two types of changes have been made:
* - reordered floating point instructions to
* prevent pipline stalls
* - made WRITE_SAMPLE use integer instead of
* (slower) floating point
* all kinds of x86 processors should benefit from these
* modifications.
*
* useful sources of information on optimizing x86 code include:
*
* Intel Architecture Optimization Manual
* http://www.intel.com/design/pentium/manuals/242816.htm
*
* Cyrix 6x86 Instruction Set Summary
* ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
*
* AMD-K5 Processor Software Development
* http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
*
* Stefan Bieschewski <stb@acm.org>
*
* $Id$
*/
#define real float /* ugly - but only way */
static long buffs[1088];
static long bo=1;
int synth_1to1_pent(real *bandPtr, int channel, short *samples)
{
real tmp[3];
register int retval;
__asm __volatile(
" movl %1,%%eax\n\t"/*bandPtr*/
" movl %3,%%esi\n\t"
" xorl %%edi,%%edi\n\t"
" movl bo,%%ebp\n\t"
" cmpl %%edi,%2\n\t"
" jne .L48\n\t"
" decl %%ebp\n\t"
" andl $15,%%ebp\n\t"
" movl %%ebp,bo\n\t"
" movl $buffs,%%ecx\n\t"
" jmp .L49\n\t"
".L48:\n\t"
" addl $2,%%esi\n\t"
" movl $buffs+2176,%%ecx\n\t"
".L49:\n\t"
" testl $1,%%ebp\n\t"
" je .L50\n\t"
" movl %%ecx,%%ebx\n\t"
" movl %%ebp,%4\n\t"
" pushl %%eax\n\t"
" movl 4+%4,%%edx\n\t"
" leal (%%ebx,%%edx,4),%%eax\n\t"
" pushl %%eax\n\t"
" movl 8+%4,%%eax\n\t"
" incl %%eax\n\t"
" andl $15,%%eax\n\t"
" leal 1088(,%%eax,4),%%eax\n\t"
" addl %%ebx,%%eax\n\t"
" jmp .L74\n\t"
".L50:\n\t"
" leal 1088(%%ecx),%%ebx\n\t"
" leal 1(%%ebp),%%edx\n\t"
" movl %%edx,%4\n\t"
" pushl %%eax\n\t"
" leal 1092(%%ecx,%%ebp,4),%%eax\n\t"
" pushl %%eax\n\t"
" leal (%%ecx,%%ebp,4),%%eax\n\t"
".L74:\n\t"
" pushl %%eax\n\t"
" call dct64\n\t"
" addl $12,%%esp\n\t"
" movl %4,%%edx\n\t"
" leal 0(,%%edx,4),%%edx\n\t"
" movl $decwin+64,%%eax\n\t"
" movl %%eax,%%ecx\n\t"
" subl %%edx,%%ecx\n\t"
" movl $16,%%ebp\n\t"
".L55:\n\t"
" flds (%%ecx)\n\t"
" fmuls (%%ebx)\n\t"
" flds 4(%%ecx)\n\t"
" fmuls 4(%%ebx)\n\t"
" fxch %%st(1)\n\t"
" flds 8(%%ecx)\n\t"
" fmuls 8(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds 12(%%ecx)\n\t"
" fmuls 12(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 16(%%ecx)\n\t"
" fmuls 16(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds 20(%%ecx)\n\t"
" fmuls 20(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 24(%%ecx)\n\t"
" fmuls 24(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds 28(%%ecx)\n\t"
" fmuls 28(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 32(%%ecx)\n\t"
" fmuls 32(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds 36(%%ecx)\n\t"
" fmuls 36(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 40(%%ecx)\n\t"
" fmuls 40(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds 44(%%ecx)\n\t"
" fmuls 44(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 48(%%ecx)\n\t"
" fmuls 48(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds 52(%%ecx)\n\t"
" fmuls 52(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 56(%%ecx)\n\t"
" fmuls 56(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds 60(%%ecx)\n\t"
" fmuls 60(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" subl $4,%%esp\n\t"
" faddp %%st,%%st(1)\n\t"
" fxch %%st(1)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" fistpl (%%esp)\n\t"
" popl %%eax\n\t"
" cmpl $32767,%%eax\n\t"
" jg 1f\n\t"
" cmpl $-32768,%%eax\n\t"
" jl 2f\n\t"
" movw %%ax,(%%esi)\n\t"
" jmp 4f\n\t"
"1: movw $32767,(%%esi)\n\t"
" jmp 3f\n\t"
"2: movw $-32768,(%%esi)\n\t"
"3: incl %%edi\n\t"
"4:\n\t"
".L54:\n\t"
" addl $64,%%ebx\n\t"
" subl $-128,%%ecx\n\t"
" addl $4,%%esi\n\t"
" decl %%ebp\n\t"
" jnz .L55\n\t"
" flds (%%ecx)\n\t"
" fmuls (%%ebx)\n\t"
" flds 8(%%ecx)\n\t"
" fmuls 8(%%ebx)\n\t"
" flds 16(%%ecx)\n\t"
" fmuls 16(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 24(%%ecx)\n\t"
" fmuls 24(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 32(%%ecx)\n\t"
" fmuls 32(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 40(%%ecx)\n\t"
" fmuls 40(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 48(%%ecx)\n\t"
" fmuls 48(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" faddp %%st,%%st(1)\n\t"
" flds 56(%%ecx)\n\t"
" fmuls 56(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" subl $4,%%esp\n\t"
" faddp %%st,%%st(1)\n\t"
" fxch %%st(1)\n\t"
" faddp %%st,%%st(1)\n\t"
" fistpl (%%esp)\n\t"
" popl %%eax\n\t"
" cmpl $32767,%%eax\n\t"
" jg 1f\n\t"
" cmpl $-32768,%%eax\n\t"
" jl 2f\n\t"
" movw %%ax,(%%esi)\n\t"
" jmp 4f\n\t"
"1: movw $32767,(%%esi)\n\t"
" jmp 3f\n\t"
"2: movw $-32768,(%%esi)\n\t"
"3: incl %%edi\n\t"
"4:\n\t"
".L62:\n\t"
" addl $-64,%%ebx\n\t"
" addl $4,%%esi\n\t"
" movl %4,%%edx\n\t"
" leal -128(%%ecx,%%edx,8),%%ecx\n\t"
" movl $15,%%ebp\n\t"
".L68:\n\t"
" flds -4(%%ecx)\n\t"
" fchs\n\t"
" fmuls (%%ebx)\n\t"
" flds -8(%%ecx)\n\t"
" fmuls 4(%%ebx)\n\t"
" fxch %%st(1)\n\t"
" flds -12(%%ecx)\n\t"
" fmuls 8(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -16(%%ecx)\n\t"
" fmuls 12(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -20(%%ecx)\n\t"
" fmuls 16(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -24(%%ecx)\n\t"
" fmuls 20(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -28(%%ecx)\n\t"
" fmuls 24(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -32(%%ecx)\n\t"
" fmuls 28(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -36(%%ecx)\n\t"
" fmuls 32(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -40(%%ecx)\n\t"
" fmuls 36(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -44(%%ecx)\n\t"
" fmuls 40(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -48(%%ecx)\n\t"
" fmuls 44(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -52(%%ecx)\n\t"
" fmuls 48(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -56(%%ecx)\n\t"
" fmuls 52(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds -60(%%ecx)\n\t"
" fmuls 56(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" flds (%%ecx)\n\t"
" fmuls 60(%%ebx)\n\t"
" fxch %%st(2)\n\t"
" subl $4,%%esp\n\t"
" fsubrp %%st,%%st(1)\n\t"
" fxch %%st(1)\n\t"
" fsubrp %%st,%%st(1)\n\t"
" fistpl (%%esp)\n\t"
" popl %%eax\n\t"
" cmpl $32767,%%eax\n\t"
" jg 1f\n\t"
" cmpl $-32768,%%eax\n\t"
" jl 2f\n\t"
" movw %%ax,(%%esi)\n\t"
" jmp 4f\n\t"
"1: movw $32767,(%%esi)\n\t"
" jmp 3f\n\t"
"2: movw $-32768,(%%esi)\n\t"
"3: incl %%edi\n\t"
"4:\n\t"
".L67:\n\t"
" addl $-64,%%ebx\n\t"
" addl $-128,%%ecx\n\t"
" addl $4,%%esi\n\t"
" decl %%ebp\n\t"
" jnz .L68\n\t"
" movl %%edi,%%eax\n\t"
:"=a"(retval)
:"m"(bandPtr),"m"(channel),"m"(samples),"m"(tmp[0])
:"memory","%ebp","%edi","%esi","%ebx");
return retval;
}

View File

@ -1,321 +0,0 @@
/
/ mpg123_synth_1to1 works the same way as the c version of this
/ file. only two types of changes have been made:
/ - reordered floating point instructions to
/ prevent pipline stalls
/ - made WRITE_SAMPLE use integer instead of
/ (slower) floating point
/ all kinds of x86 processors should benefit from these
/ modifications.
/
/ useful sources of information on optimizing x86 code include:
/
/ Intel Architecture Optimization Manual
/ http://www.intel.com/design/pentium/manuals/242816.htm
/
/ Cyrix 6x86 Instruction Set Summary
/ ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
/
/ AMD-K5 Processor Software Development
/ http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
/
/ Stefan Bieschewski <stb@acm.org>
/
/ $Id$
/
.bss
.comm buffs,4352,4
.data
.align 4
bo:
.long 1
.section .rodata
.align 8
.LC0:
.long 0x0,0x40dfffc0
.align 8
.LC1:
.long 0x0,0xc0e00000
.align 8
.text
.globl synth_1to1_pent
synth_1to1_pent:
subl $12,%esp
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
movl 32(%esp),%eax
movl 40(%esp),%esi
xorl %edi,%edi
movl bo,%ebp
cmpl %edi,36(%esp)
jne .L48
decl %ebp
andl $15,%ebp
movl %ebp,bo
movl $buffs,%ecx
jmp .L49
.L48:
addl $2,%esi
movl $buffs+2176,%ecx
.L49:
testl $1,%ebp
je .L50
movl %ecx,%ebx
movl %ebp,16(%esp)
pushl %eax
movl 20(%esp),%edx
leal (%ebx,%edx,4),%eax
pushl %eax
movl 24(%esp),%eax
incl %eax
andl $15,%eax
leal 1088(,%eax,4),%eax
addl %ebx,%eax
jmp .L74
.L50:
leal 1088(%ecx),%ebx
leal 1(%ebp),%edx
movl %edx,16(%esp)
pushl %eax
leal 1092(%ecx,%ebp,4),%eax
pushl %eax
leal (%ecx,%ebp,4),%eax
.L74:
pushl %eax
call dct64
addl $12,%esp
movl 16(%esp),%edx
leal 0(,%edx,4),%edx
movl $decwin+64,%eax
movl %eax,%ecx
subl %edx,%ecx
movl $16,%ebp
.L55:
flds (%ecx)
fmuls (%ebx)
flds 4(%ecx)
fmuls 4(%ebx)
fxch %st(1)
flds 8(%ecx)
fmuls 8(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 12(%ecx)
fmuls 12(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 16(%ecx)
fmuls 16(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 20(%ecx)
fmuls 20(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 24(%ecx)
fmuls 24(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 28(%ecx)
fmuls 28(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 32(%ecx)
fmuls 32(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 36(%ecx)
fmuls 36(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 40(%ecx)
fmuls 40(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 44(%ecx)
fmuls 44(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 48(%ecx)
fmuls 48(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 52(%ecx)
fmuls 52(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 56(%ecx)
fmuls 56(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds 60(%ecx)
fmuls 60(%ebx)
fxch %st(2)
subl $4,%esp
faddp %st,%st(1)
fxch %st(1)
fsubrp %st,%st(1)
fistpl (%esp)
popl %eax
cmpl $32767,%eax
jg 1f
cmpl $-32768,%eax
jl 2f
movw %ax,(%esi)
jmp 4f
1: movw $32767,(%esi)
jmp 3f
2: movw $-32768,(%esi)
3: incl %edi
4:
.L54:
addl $64,%ebx
subl $-128,%ecx
addl $4,%esi
decl %ebp
jnz .L55
flds (%ecx)
fmuls (%ebx)
flds 8(%ecx)
fmuls 8(%ebx)
flds 16(%ecx)
fmuls 16(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 24(%ecx)
fmuls 24(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 32(%ecx)
fmuls 32(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 40(%ecx)
fmuls 40(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 48(%ecx)
fmuls 48(%ebx)
fxch %st(2)
faddp %st,%st(1)
flds 56(%ecx)
fmuls 56(%ebx)
fxch %st(2)
subl $4,%esp
faddp %st,%st(1)
fxch %st(1)
faddp %st,%st(1)
fistpl (%esp)
popl %eax
cmpl $32767,%eax
jg 1f
cmpl $-32768,%eax
jl 2f
movw %ax,(%esi)
jmp 4f
1: movw $32767,(%esi)
jmp 3f
2: movw $-32768,(%esi)
3: incl %edi
4:
.L62:
addl $-64,%ebx
addl $4,%esi
movl 16(%esp),%edx
leal -128(%ecx,%edx,8),%ecx
movl $15,%ebp
.L68:
flds -4(%ecx)
fchs
fmuls (%ebx)
flds -8(%ecx)
fmuls 4(%ebx)
fxch %st(1)
flds -12(%ecx)
fmuls 8(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -16(%ecx)
fmuls 12(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -20(%ecx)
fmuls 16(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -24(%ecx)
fmuls 20(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -28(%ecx)
fmuls 24(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -32(%ecx)
fmuls 28(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -36(%ecx)
fmuls 32(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -40(%ecx)
fmuls 36(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -44(%ecx)
fmuls 40(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -48(%ecx)
fmuls 44(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -52(%ecx)
fmuls 48(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -56(%ecx)
fmuls 52(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds -60(%ecx)
fmuls 56(%ebx)
fxch %st(2)
fsubrp %st,%st(1)
flds (%ecx)
fmuls 60(%ebx)
fxch %st(2)
subl $4,%esp
fsubrp %st,%st(1)
fxch %st(1)
fsubrp %st,%st(1)
fistpl (%esp)
popl %eax
cmpl $32767,%eax
jg 1f
cmpl $-32768,%eax
jl 2f
movw %ax,(%esi)
jmp 4f
1: movw $32767,(%esi)
jmp 3f
2: movw $-32768,(%esi)
3: incl %edi
4:
.L67:
addl $-64,%ebx
addl $-128,%ecx
addl $4,%esi
decl %ebp
jnz .L68
movl %edi,%eax
popl %ebx
popl %esi
popl %edi
popl %ebp
addl $12,%esp
ret