mirror of
https://github.com/mpv-player/mpv
synced 2025-01-03 13:32:16 +00:00
moved 3dnow and 3dnowex dct36 optimisations into gcc inline assembly
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@10323 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
parent
0865846ecb
commit
d0cf347a62
@ -17,9 +17,9 @@ OBJS += decode_MMX.o dct64_MMX.o tabinit_MMX.o
|
||||
#SRCS += dct64_sse.s
|
||||
#OBJS += dct64_sse.o
|
||||
#endif
|
||||
SRCS += dct36_3dnow.s dct64_3dnow.c
|
||||
SRCS += dct36_3dnow.c dct64_3dnow.c
|
||||
OBJS += dct36_3dnow.o dct64_3dnow.o
|
||||
SRCS += dct36_k7.s dct64_k7.c
|
||||
SRCS += dct36_k7.c dct64_k7.c
|
||||
OBJS += dct36_k7.o dct64_k7.o
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH_POWERPC),yes)
|
||||
|
497
mp3lib/dct36_3dnow.c
Normal file
497
mp3lib/dct36_3dnow.c
Normal file
@ -0,0 +1,497 @@
|
||||
/*
|
||||
* dct36_3dnow.c - 3DNow! optimized dct36()
|
||||
*
|
||||
* This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
|
||||
* <squash@mb.kcom.ne.jp>, only two types of changes have been made:
|
||||
*
|
||||
* - removed PREFETCH instruction for speedup
|
||||
* - changed function name for support 3DNow! automatic detection
|
||||
*
|
||||
* You can find Kashiyama's original 3dnow! support patch
|
||||
* (for mpg123-0.59o) at
|
||||
* http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
|
||||
*
|
||||
* by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
|
||||
* <kim@comtec.co.jp> - after 1.Apr.1999
|
||||
*
|
||||
* Original disclaimer:
|
||||
* The author of this program disclaim whole expressed or implied
|
||||
* warranties with regard to this program, and in no event shall the
|
||||
* author of this program liable to whatever resulted from the use of
|
||||
* this program. Use it at your own risk.
|
||||
*
|
||||
* 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi
|
||||
*/
|
||||
|
||||
#define real float /* ugly - but only way */
|
||||
|
||||
#include "../mangle.h"
|
||||
|
||||
#ifdef __DCT36_OPTIMIZE_FOR_K7
|
||||
void dct36_3dnowex(real *inbuf, real *o1,
|
||||
real *o2, real *wintab, real *tsbuf)
|
||||
#else
|
||||
void dct36_3dnow(real *inbuf, real *o1,
|
||||
real *o2, real *wintab, real *tsbuf)
|
||||
#endif
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"movq (%%eax),%%mm0\n\t"
|
||||
"movq 4(%%eax),%%mm1\n\t"
|
||||
"pfadd %%mm1,%%mm0\n\t"
|
||||
"movq %%mm0,4(%%eax)\n\t"
|
||||
"psrlq $32,%%mm1\n\t"
|
||||
"movq 12(%%eax),%%mm2\n\t"
|
||||
"punpckldq %%mm2,%%mm1\n\t"
|
||||
"pfadd %%mm2,%%mm1\n\t"
|
||||
"movq %%mm1,12(%%eax)\n\t"
|
||||
"psrlq $32,%%mm2\n\t"
|
||||
"movq 20(%%eax),%%mm3\n\t"
|
||||
"punpckldq %%mm3,%%mm2\n\t"
|
||||
"pfadd %%mm3,%%mm2\n\t"
|
||||
"movq %%mm2,20(%%eax)\n\t"
|
||||
"psrlq $32,%%mm3\n\t"
|
||||
"movq 28(%%eax),%%mm4\n\t"
|
||||
"punpckldq %%mm4,%%mm3\n\t"
|
||||
"pfadd %%mm4,%%mm3\n\t"
|
||||
"movq %%mm3,28(%%eax)\n\t"
|
||||
"psrlq $32,%%mm4\n\t"
|
||||
"movq 36(%%eax),%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm4\n\t"
|
||||
"pfadd %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,36(%%eax)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movq 44(%%eax),%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movq %%mm5,44(%%eax)\n\t"
|
||||
"psrlq $32,%%mm6\n\t"
|
||||
"movq 52(%%eax),%%mm7\n\t"
|
||||
"punpckldq %%mm7,%%mm6\n\t"
|
||||
"pfadd %%mm7,%%mm6\n\t"
|
||||
"movq %%mm6,52(%%eax)\n\t"
|
||||
"psrlq $32,%%mm7\n\t"
|
||||
"movq 60(%%eax),%%mm0\n\t"
|
||||
"punpckldq %%mm0,%%mm7\n\t"
|
||||
"pfadd %%mm0,%%mm7\n\t"
|
||||
"movq %%mm7,60(%%eax)\n\t"
|
||||
"psrlq $32,%%mm0\n\t"
|
||||
"movd 68(%%eax),%%mm1\n\t"
|
||||
"pfadd %%mm1,%%mm0\n\t"
|
||||
"movd %%mm0,68(%%eax)\n\t"
|
||||
"movd 4(%%eax),%%mm0\n\t"
|
||||
"movd 12(%%eax),%%mm1\n\t"
|
||||
"punpckldq %%mm1,%%mm0\n\t"
|
||||
"punpckldq 20(%%eax),%%mm1\n\t"
|
||||
"pfadd %%mm1,%%mm0\n\t"
|
||||
"movd %%mm0,12(%%eax)\n\t"
|
||||
"psrlq $32,%%mm0\n\t"
|
||||
"movd %%mm0,20(%%eax)\n\t"
|
||||
"psrlq $32,%%mm1\n\t"
|
||||
"movd 28(%%eax),%%mm2\n\t"
|
||||
"punpckldq %%mm2,%%mm1\n\t"
|
||||
"punpckldq 36(%%eax),%%mm2\n\t"
|
||||
"pfadd %%mm2,%%mm1\n\t"
|
||||
"movd %%mm1,28(%%eax)\n\t"
|
||||
"psrlq $32,%%mm1\n\t"
|
||||
"movd %%mm1,36(%%eax)\n\t"
|
||||
"psrlq $32,%%mm2\n\t"
|
||||
"movd 44(%%eax),%%mm3\n\t"
|
||||
"punpckldq %%mm3,%%mm2\n\t"
|
||||
"punpckldq 52(%%eax),%%mm3\n\t"
|
||||
"pfadd %%mm3,%%mm2\n\t"
|
||||
"movd %%mm2,44(%%eax)\n\t"
|
||||
"psrlq $32,%%mm2\n\t"
|
||||
"movd %%mm2,52(%%eax)\n\t"
|
||||
"psrlq $32,%%mm3\n\t"
|
||||
"movd 60(%%eax),%%mm4\n\t"
|
||||
"punpckldq %%mm4,%%mm3\n\t"
|
||||
"punpckldq 68(%%eax),%%mm4\n\t"
|
||||
"pfadd %%mm4,%%mm3\n\t"
|
||||
"movd %%mm3,60(%%eax)\n\t"
|
||||
"psrlq $32,%%mm3\n\t"
|
||||
"movd %%mm3,68(%%eax)\n\t"
|
||||
|
||||
"movq 24(%%eax),%%mm0\n\t"
|
||||
"movq 48(%%eax),%%mm1\n\t"
|
||||
"movd "MANGLE(COS9)"+12,%%mm2\n\t"
|
||||
"punpckldq %%mm2,%%mm2\n\t"
|
||||
"movd "MANGLE(COS9)"+24,%%mm3\n\t"
|
||||
"punpckldq %%mm3,%%mm3\n\t"
|
||||
"pfmul %%mm2,%%mm0\n\t"
|
||||
"pfmul %%mm3,%%mm1\n\t"
|
||||
"pushl %%eax\n\t"
|
||||
"movl $1,%%eax\n\t"
|
||||
"movd %%eax,%%mm7\n\t"
|
||||
"pi2fd %%mm7,%%mm7\n\t"
|
||||
"popl %%eax\n\t"
|
||||
"movq 8(%%eax),%%mm2\n\t"
|
||||
"movd "MANGLE(COS9)"+4,%%mm3\n\t"
|
||||
"punpckldq %%mm3,%%mm3\n\t"
|
||||
"pfmul %%mm3,%%mm2\n\t"
|
||||
"pfadd %%mm0,%%mm2\n\t"
|
||||
"movq 40(%%eax),%%mm3\n\t"
|
||||
"movd "MANGLE(COS9)"+20,%%mm4\n\t"
|
||||
"punpckldq %%mm4,%%mm4\n\t"
|
||||
"pfmul %%mm4,%%mm3\n\t"
|
||||
"pfadd %%mm3,%%mm2\n\t"
|
||||
"movq 56(%%eax),%%mm3\n\t"
|
||||
"movd "MANGLE(COS9)"+28,%%mm4\n\t"
|
||||
"punpckldq %%mm4,%%mm4\n\t"
|
||||
"pfmul %%mm4,%%mm3\n\t"
|
||||
"pfadd %%mm3,%%mm2\n\t"
|
||||
"movq (%%eax),%%mm3\n\t"
|
||||
"movq 16(%%eax),%%mm4\n\t"
|
||||
"movd "MANGLE(COS9)"+8,%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"pfadd %%mm4,%%mm3\n\t"
|
||||
"movq 32(%%eax),%%mm4\n\t"
|
||||
"movd "MANGLE(COS9)"+16,%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"pfadd %%mm4,%%mm3\n\t"
|
||||
"pfadd %%mm1,%%mm3\n\t"
|
||||
"movq 64(%%eax),%%mm4\n\t"
|
||||
"movd "MANGLE(COS9)"+32,%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"pfadd %%mm4,%%mm3\n\t"
|
||||
"movq %%mm2,%%mm4\n\t"
|
||||
"pfadd %%mm3,%%mm4\n\t"
|
||||
"movq %%mm7,%%mm5\n\t"
|
||||
"punpckldq "MANGLE(tfcos36)"+0,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,%%mm5\n\t"
|
||||
"pfacc %%mm5,%%mm5\n\t"
|
||||
"movd 108(%%edx),%%mm6\n\t"
|
||||
"punpckldq 104(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
#ifdef __DCT36_OPTIMIZE_FOR_K7
|
||||
"pswapd %%mm5,%%mm5\n\t"
|
||||
"movq %%mm5,32(%%ecx)\n\t"
|
||||
#else
|
||||
"movd %%mm5,36(%%ecx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,32(%%ecx)\n\t"
|
||||
#endif
|
||||
"movq %%mm4,%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfsub %%mm6,%%mm5\n\t"
|
||||
"punpckhdq %%mm5,%%mm5\n\t"
|
||||
"movd 32(%%edx),%%mm6\n\t"
|
||||
"punpckldq 36(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd 32(%%esi),%%mm6\n\t"
|
||||
"punpckldq 36(%%esi),%%mm6\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,1024(%%ebx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,1152(%%ebx)\n\t"
|
||||
"movq %%mm3,%%mm4\n\t"
|
||||
"pfsub %%mm2,%%mm4\n\t"
|
||||
"movq %%mm7,%%mm5\n\t"
|
||||
"punpckldq "MANGLE(tfcos36)"+32,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,%%mm5\n\t"
|
||||
"pfacc %%mm5,%%mm5\n\t"
|
||||
"movd 140(%%edx),%%mm6\n\t"
|
||||
"punpckldq 72(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,68(%%ecx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,0(%%ecx)\n\t"
|
||||
"movq %%mm4,%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfsub %%mm6,%%mm5\n\t"
|
||||
"punpckhdq %%mm5,%%mm5\n\t"
|
||||
"movd 0(%%edx),%%mm6\n\t"
|
||||
"punpckldq 68(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd 0(%%esi),%%mm6\n\t"
|
||||
"punpckldq 68(%%esi),%%mm6\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,0(%%ebx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,2176(%%ebx)\n\t"
|
||||
"movq 8(%%eax),%%mm2\n\t"
|
||||
"movq 40(%%eax),%%mm3\n\t"
|
||||
"pfsub %%mm3,%%mm2\n\t"
|
||||
"movq 56(%%eax),%%mm3\n\t"
|
||||
"pfsub %%mm3,%%mm2\n\t"
|
||||
"movd "MANGLE(COS9)"+12,%%mm3\n\t"
|
||||
"punpckldq %%mm3,%%mm3\n\t"
|
||||
"pfmul %%mm3,%%mm2\n\t"
|
||||
"movq 16(%%eax),%%mm3\n\t"
|
||||
"movq 32(%%eax),%%mm4\n\t"
|
||||
"pfsub %%mm4,%%mm3\n\t"
|
||||
"movq 64(%%eax),%%mm4\n\t"
|
||||
"pfsub %%mm4,%%mm3\n\t"
|
||||
"movd "MANGLE(COS9)"+24,%%mm4\n\t"
|
||||
"punpckldq %%mm4,%%mm4\n\t"
|
||||
"pfmul %%mm4,%%mm3\n\t"
|
||||
"movq 48(%%eax),%%mm4\n\t"
|
||||
"pfsub %%mm4,%%mm3\n\t"
|
||||
"movq (%%eax),%%mm4\n\t"
|
||||
"pfadd %%mm4,%%mm3\n\t"
|
||||
"movq %%mm2,%%mm4\n\t"
|
||||
"pfadd %%mm3,%%mm4\n\t"
|
||||
"movq %%mm7,%%mm5\n\t"
|
||||
"punpckldq "MANGLE(tfcos36)"+4,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,%%mm5\n\t"
|
||||
"pfacc %%mm5,%%mm5\n\t"
|
||||
"movd 112(%%edx),%%mm6\n\t"
|
||||
"punpckldq 100(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,40(%%ecx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,28(%%ecx)\n\t"
|
||||
"movq %%mm4,%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfsub %%mm6,%%mm5\n\t"
|
||||
"punpckhdq %%mm5,%%mm5\n\t"
|
||||
"movd 28(%%edx),%%mm6\n\t"
|
||||
"punpckldq 40(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd 28(%%esi),%%mm6\n\t"
|
||||
"punpckldq 40(%%esi),%%mm6\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,896(%%ebx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,1280(%%ebx)\n\t"
|
||||
"movq %%mm3,%%mm4\n\t"
|
||||
"pfsub %%mm2,%%mm4\n\t"
|
||||
"movq %%mm7,%%mm5\n\t"
|
||||
"punpckldq "MANGLE(tfcos36)"+28,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,%%mm5\n\t"
|
||||
"pfacc %%mm5,%%mm5\n\t"
|
||||
"movd 136(%%edx),%%mm6\n\t"
|
||||
"punpckldq 76(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,64(%%ecx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,4(%%ecx)\n\t"
|
||||
"movq %%mm4,%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfsub %%mm6,%%mm5\n\t"
|
||||
"punpckhdq %%mm5,%%mm5\n\t"
|
||||
"movd 4(%%edx),%%mm6\n\t"
|
||||
"punpckldq 64(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd 4(%%esi),%%mm6\n\t"
|
||||
"punpckldq 64(%%esi),%%mm6\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,128(%%ebx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,2048(%%ebx)\n\t"
|
||||
|
||||
"movq 8(%%eax),%%mm2\n\t"
|
||||
"movd "MANGLE(COS9)"+20,%%mm3\n\t"
|
||||
"punpckldq %%mm3,%%mm3\n\t"
|
||||
"pfmul %%mm3,%%mm2\n\t"
|
||||
"pfsub %%mm0,%%mm2\n\t"
|
||||
"movq 40(%%eax),%%mm3\n\t"
|
||||
"movd "MANGLE(COS9)"+28,%%mm4\n\t"
|
||||
"punpckldq %%mm4,%%mm4\n\t"
|
||||
"pfmul %%mm4,%%mm3\n\t"
|
||||
"pfsub %%mm3,%%mm2\n\t"
|
||||
"movq 56(%%eax),%%mm3\n\t"
|
||||
"movd "MANGLE(COS9)"+4,%%mm4\n\t"
|
||||
"punpckldq %%mm4,%%mm4\n\t"
|
||||
"pfmul %%mm4,%%mm3\n\t"
|
||||
"pfadd %%mm3,%%mm2\n\t"
|
||||
"movq (%%eax),%%mm3\n\t"
|
||||
"movq 16(%%eax),%%mm4\n\t"
|
||||
"movd "MANGLE(COS9)"+32,%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"pfsub %%mm4,%%mm3\n\t"
|
||||
"movq 32(%%eax),%%mm4\n\t"
|
||||
"movd "MANGLE(COS9)"+8,%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"pfsub %%mm4,%%mm3\n\t"
|
||||
"pfadd %%mm1,%%mm3\n\t"
|
||||
"movq 64(%%eax),%%mm4\n\t"
|
||||
"movd "MANGLE(COS9)"+16,%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"pfadd %%mm4,%%mm3\n\t"
|
||||
"movq %%mm2,%%mm4\n\t"
|
||||
"pfadd %%mm3,%%mm4\n\t"
|
||||
"movq %%mm7,%%mm5\n\t"
|
||||
"punpckldq "MANGLE(tfcos36)"+8,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,%%mm5\n\t"
|
||||
"pfacc %%mm5,%%mm5\n\t"
|
||||
"movd 116(%%edx),%%mm6\n\t"
|
||||
"punpckldq 96(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,44(%%ecx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,24(%%ecx)\n\t"
|
||||
"movq %%mm4,%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfsub %%mm6,%%mm5\n\t"
|
||||
"punpckhdq %%mm5,%%mm5\n\t"
|
||||
"movd 24(%%edx),%%mm6\n\t"
|
||||
"punpckldq 44(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd 24(%%esi),%%mm6\n\t"
|
||||
"punpckldq 44(%%esi),%%mm6\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,768(%%ebx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,1408(%%ebx)\n\t"
|
||||
"movq %%mm3,%%mm4\n\t"
|
||||
"pfsub %%mm2,%%mm4\n\t"
|
||||
"movq %%mm7,%%mm5\n\t"
|
||||
"punpckldq "MANGLE(tfcos36)"+24,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,%%mm5\n\t"
|
||||
"pfacc %%mm5,%%mm5\n\t"
|
||||
"movd 132(%%edx),%%mm6\n\t"
|
||||
"punpckldq 80(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,60(%%ecx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,8(%%ecx)\n\t"
|
||||
"movq %%mm4,%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfsub %%mm6,%%mm5\n\t"
|
||||
"punpckhdq %%mm5,%%mm5\n\t"
|
||||
"movd 8(%%edx),%%mm6\n\t"
|
||||
"punpckldq 60(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd 8(%%esi),%%mm6\n\t"
|
||||
"punpckldq 60(%%esi),%%mm6\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,256(%%ebx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,1920(%%ebx)\n\t"
|
||||
"movq 8(%%eax),%%mm2\n\t"
|
||||
"movd "MANGLE(COS9)"+28,%%mm3\n\t"
|
||||
"punpckldq %%mm3,%%mm3\n\t"
|
||||
"pfmul %%mm3,%%mm2\n\t"
|
||||
"pfsub %%mm0,%%mm2\n\t"
|
||||
"movq 40(%%eax),%%mm3\n\t"
|
||||
"movd "MANGLE(COS9)"+4,%%mm4\n\t"
|
||||
"punpckldq %%mm4,%%mm4\n\t"
|
||||
"pfmul %%mm4,%%mm3\n\t"
|
||||
"pfadd %%mm3,%%mm2\n\t"
|
||||
"movq 56(%%eax),%%mm3\n\t"
|
||||
"movd "MANGLE(COS9)"+20,%%mm4\n\t"
|
||||
"punpckldq %%mm4,%%mm4\n\t"
|
||||
"pfmul %%mm4,%%mm3\n\t"
|
||||
"pfsub %%mm3,%%mm2\n\t"
|
||||
"movq (%%eax),%%mm3\n\t"
|
||||
"movq 16(%%eax),%%mm4\n\t"
|
||||
"movd "MANGLE(COS9)"+16,%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"pfsub %%mm4,%%mm3\n\t"
|
||||
"movq 32(%%eax),%%mm4\n\t"
|
||||
"movd "MANGLE(COS9)"+32,%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"pfadd %%mm4,%%mm3\n\t"
|
||||
"pfadd %%mm1,%%mm3\n\t"
|
||||
"movq 64(%%eax),%%mm4\n\t"
|
||||
"movd "MANGLE(COS9)"+8,%%mm5\n\t"
|
||||
"punpckldq %%mm5,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"pfsub %%mm4,%%mm3\n\t"
|
||||
"movq %%mm2,%%mm4\n\t"
|
||||
"pfadd %%mm3,%%mm4\n\t"
|
||||
"movq %%mm7,%%mm5\n\t"
|
||||
"punpckldq "MANGLE(tfcos36)"+12,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,%%mm5\n\t"
|
||||
"pfacc %%mm5,%%mm5\n\t"
|
||||
"movd 120(%%edx),%%mm6\n\t"
|
||||
"punpckldq 92(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,48(%%ecx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,20(%%ecx)\n\t"
|
||||
"movq %%mm4,%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfsub %%mm6,%%mm5\n\t"
|
||||
"punpckhdq %%mm5,%%mm5\n\t"
|
||||
"movd 20(%%edx),%%mm6\n\t"
|
||||
"punpckldq 48(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd 20(%%esi),%%mm6\n\t"
|
||||
"punpckldq 48(%%esi),%%mm6\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,640(%%ebx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,1536(%%ebx)\n\t"
|
||||
"movq %%mm3,%%mm4\n\t"
|
||||
"pfsub %%mm2,%%mm4\n\t"
|
||||
"movq %%mm7,%%mm5\n\t"
|
||||
"punpckldq "MANGLE(tfcos36)"+20,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,%%mm5\n\t"
|
||||
"pfacc %%mm5,%%mm5\n\t"
|
||||
"movd 128(%%edx),%%mm6\n\t"
|
||||
"punpckldq 84(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,56(%%ecx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,12(%%ecx)\n\t"
|
||||
"movq %%mm4,%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfsub %%mm6,%%mm5\n\t"
|
||||
"punpckhdq %%mm5,%%mm5\n\t"
|
||||
"movd 12(%%edx),%%mm6\n\t"
|
||||
"punpckldq 56(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd 12(%%esi),%%mm6\n\t"
|
||||
"punpckldq 56(%%esi),%%mm6\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,384(%%ebx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,1792(%%ebx)\n\t"
|
||||
|
||||
"movq (%%eax),%%mm4\n\t"
|
||||
"movq 16(%%eax),%%mm3\n\t"
|
||||
"pfsub %%mm3,%%mm4\n\t"
|
||||
"movq 32(%%eax),%%mm3\n\t"
|
||||
"pfadd %%mm3,%%mm4\n\t"
|
||||
"movq 48(%%eax),%%mm3\n\t"
|
||||
"pfsub %%mm3,%%mm4\n\t"
|
||||
"movq 64(%%eax),%%mm3\n\t"
|
||||
"pfadd %%mm3,%%mm4\n\t"
|
||||
"movq %%mm7,%%mm5\n\t"
|
||||
"punpckldq "MANGLE(tfcos36)"+16,%%mm5\n\t"
|
||||
"pfmul %%mm5,%%mm4\n\t"
|
||||
"movq %%mm4,%%mm5\n\t"
|
||||
"pfacc %%mm5,%%mm5\n\t"
|
||||
"movd 124(%%edx),%%mm6\n\t"
|
||||
"punpckldq 88(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,52(%%ecx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,16(%%ecx)\n\t"
|
||||
"movq %%mm4,%%mm6\n\t"
|
||||
"punpckldq %%mm6,%%mm5\n\t"
|
||||
"pfsub %%mm6,%%mm5\n\t"
|
||||
"punpckhdq %%mm5,%%mm5\n\t"
|
||||
"movd 16(%%edx),%%mm6\n\t"
|
||||
"punpckldq 52(%%edx),%%mm6\n\t"
|
||||
"pfmul %%mm6,%%mm5\n\t"
|
||||
"movd 16(%%esi),%%mm6\n\t"
|
||||
"punpckldq 52(%%esi),%%mm6\n\t"
|
||||
"pfadd %%mm6,%%mm5\n\t"
|
||||
"movd %%mm5,512(%%ebx)\n\t"
|
||||
"psrlq $32,%%mm5\n\t"
|
||||
"movd %%mm5,1664(%%ebx)\n\t"
|
||||
|
||||
"femms\n\t"
|
||||
:
|
||||
: "a" (inbuf), "S" (o1), "c" (o2), "d" (wintab), "b" (tsbuf)
|
||||
: "memory");
|
||||
}
|
@ -1,499 +0,0 @@
|
||||
/
|
||||
/ dct36_3dnow.s - 3DNow! optimized dct36()
|
||||
/
|
||||
/ This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
|
||||
/ <squash@mb.kcom.ne.jp>,only two types of changes have been made:
|
||||
/
|
||||
/ - remove PREFETCH instruction for speedup
|
||||
/ - change function name for support 3DNow! automatic detect
|
||||
/
|
||||
/ You can find Kashiyama's original 3dnow! support patch
|
||||
/ (for mpg123-0.59o) at
|
||||
/ http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
|
||||
/
|
||||
/ by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
|
||||
/ <kim@comtec.co.jp> - after 1.Apr.1999
|
||||
/
|
||||
|
||||
///
|
||||
/// Replacement of dct36() with AMD's 3DNow! SIMD operations support
|
||||
///
|
||||
/// Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
|
||||
///
|
||||
/// The author of this program disclaim whole expressed or implied
|
||||
/// warranties with regard to this program, and in no event shall the
|
||||
/// author of this program liable to whatever resulted from the use of
|
||||
/// this program. Use it at your own risk.
|
||||
///
|
||||
|
||||
.globl dct36_3dnow
|
||||
.type dct36_3dnow,@function
|
||||
dct36_3dnow:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $120,%esp
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 8(%ebp),%eax
|
||||
movl 12(%ebp),%esi
|
||||
movl 16(%ebp),%ecx
|
||||
movl 20(%ebp),%edx
|
||||
movl 24(%ebp),%ebx
|
||||
leal -128(%ebp),%esp
|
||||
|
||||
femms
|
||||
movq (%eax),%mm0
|
||||
movq 4(%eax),%mm1
|
||||
pfadd %mm1,%mm0
|
||||
movq %mm0,4(%eax)
|
||||
psrlq $32,%mm1
|
||||
movq 12(%eax),%mm2
|
||||
punpckldq %mm2,%mm1
|
||||
pfadd %mm2,%mm1
|
||||
movq %mm1,12(%eax)
|
||||
psrlq $32,%mm2
|
||||
movq 20(%eax),%mm3
|
||||
punpckldq %mm3,%mm2
|
||||
pfadd %mm3,%mm2
|
||||
movq %mm2,20(%eax)
|
||||
psrlq $32,%mm3
|
||||
movq 28(%eax),%mm4
|
||||
punpckldq %mm4,%mm3
|
||||
pfadd %mm4,%mm3
|
||||
movq %mm3,28(%eax)
|
||||
psrlq $32,%mm4
|
||||
movq 36(%eax),%mm5
|
||||
punpckldq %mm5,%mm4
|
||||
pfadd %mm5,%mm4
|
||||
movq %mm4,36(%eax)
|
||||
psrlq $32,%mm5
|
||||
movq 44(%eax),%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfadd %mm6,%mm5
|
||||
movq %mm5,44(%eax)
|
||||
psrlq $32,%mm6
|
||||
movq 52(%eax),%mm7
|
||||
punpckldq %mm7,%mm6
|
||||
pfadd %mm7,%mm6
|
||||
movq %mm6,52(%eax)
|
||||
psrlq $32,%mm7
|
||||
movq 60(%eax),%mm0
|
||||
punpckldq %mm0,%mm7
|
||||
pfadd %mm0,%mm7
|
||||
movq %mm7,60(%eax)
|
||||
psrlq $32,%mm0
|
||||
movd 68(%eax),%mm1
|
||||
pfadd %mm1,%mm0
|
||||
movd %mm0,68(%eax)
|
||||
movd 4(%eax),%mm0
|
||||
movd 12(%eax),%mm1
|
||||
punpckldq %mm1,%mm0
|
||||
punpckldq 20(%eax),%mm1
|
||||
pfadd %mm1,%mm0
|
||||
movd %mm0,12(%eax)
|
||||
psrlq $32,%mm0
|
||||
movd %mm0,20(%eax)
|
||||
psrlq $32,%mm1
|
||||
movd 28(%eax),%mm2
|
||||
punpckldq %mm2,%mm1
|
||||
punpckldq 36(%eax),%mm2
|
||||
pfadd %mm2,%mm1
|
||||
movd %mm1,28(%eax)
|
||||
psrlq $32,%mm1
|
||||
movd %mm1,36(%eax)
|
||||
psrlq $32,%mm2
|
||||
movd 44(%eax),%mm3
|
||||
punpckldq %mm3,%mm2
|
||||
punpckldq 52(%eax),%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movd %mm2,44(%eax)
|
||||
psrlq $32,%mm2
|
||||
movd %mm2,52(%eax)
|
||||
psrlq $32,%mm3
|
||||
movd 60(%eax),%mm4
|
||||
punpckldq %mm4,%mm3
|
||||
punpckldq 68(%eax),%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movd %mm3,60(%eax)
|
||||
psrlq $32,%mm3
|
||||
movd %mm3,68(%eax)
|
||||
|
||||
movq 24(%eax),%mm0
|
||||
movq 48(%eax),%mm1
|
||||
movd COS9+12,%mm2
|
||||
punpckldq %mm2,%mm2
|
||||
movd COS9+24,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm2,%mm0
|
||||
pfmul %mm3,%mm1
|
||||
pushl %eax
|
||||
movl $1,%eax
|
||||
movd %eax,%mm7
|
||||
pi2fd %mm7,%mm7
|
||||
popl %eax
|
||||
movq 8(%eax),%mm2
|
||||
movd COS9+4,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm3,%mm2
|
||||
pfadd %mm0,%mm2
|
||||
movq 40(%eax),%mm3
|
||||
movd COS9+20,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movq 56(%eax),%mm3
|
||||
movd COS9+28,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movq (%eax),%mm3
|
||||
movq 16(%eax),%mm4
|
||||
movd COS9+8,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movq 32(%eax),%mm4
|
||||
movd COS9+16,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
pfadd %mm1,%mm3
|
||||
movq 64(%eax),%mm4
|
||||
movd COS9+32,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movq %mm2,%mm4
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+0,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 108(%edx),%mm6
|
||||
punpckldq 104(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,36(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,32(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 32(%edx),%mm6
|
||||
punpckldq 36(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 32(%esi),%mm6
|
||||
punpckldq 36(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,1024(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1152(%ebx)
|
||||
movq %mm3,%mm4
|
||||
pfsub %mm2,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+32,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 140(%edx),%mm6
|
||||
punpckldq 72(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,68(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,0(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 0(%edx),%mm6
|
||||
punpckldq 68(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 0(%esi),%mm6
|
||||
punpckldq 68(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,0(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,2176(%ebx)
|
||||
movq 8(%eax),%mm2
|
||||
movq 40(%eax),%mm3
|
||||
pfsub %mm3,%mm2
|
||||
movq 56(%eax),%mm3
|
||||
pfsub %mm3,%mm2
|
||||
movd COS9+12,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm3,%mm2
|
||||
movq 16(%eax),%mm3
|
||||
movq 32(%eax),%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq 64(%eax),%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movd COS9+24,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
movq 48(%eax),%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq (%eax),%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movq %mm2,%mm4
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+4,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 112(%edx),%mm6
|
||||
punpckldq 100(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,40(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,28(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 28(%edx),%mm6
|
||||
punpckldq 40(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 28(%esi),%mm6
|
||||
punpckldq 40(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,896(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1280(%ebx)
|
||||
movq %mm3,%mm4
|
||||
pfsub %mm2,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+28,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 136(%edx),%mm6
|
||||
punpckldq 76(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,64(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,4(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 4(%edx),%mm6
|
||||
punpckldq 64(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 4(%esi),%mm6
|
||||
punpckldq 64(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,128(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,2048(%ebx)
|
||||
|
||||
movq 8(%eax),%mm2
|
||||
movd COS9+20,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm3,%mm2
|
||||
pfsub %mm0,%mm2
|
||||
movq 40(%eax),%mm3
|
||||
movd COS9+28,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfsub %mm3,%mm2
|
||||
movq 56(%eax),%mm3
|
||||
movd COS9+4,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movq (%eax),%mm3
|
||||
movq 16(%eax),%mm4
|
||||
movd COS9+32,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq 32(%eax),%mm4
|
||||
movd COS9+8,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfsub %mm4,%mm3
|
||||
pfadd %mm1,%mm3
|
||||
movq 64(%eax),%mm4
|
||||
movd COS9+16,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movq %mm2,%mm4
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+8,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 116(%edx),%mm6
|
||||
punpckldq 96(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,44(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,24(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 24(%edx),%mm6
|
||||
punpckldq 44(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 24(%esi),%mm6
|
||||
punpckldq 44(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,768(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1408(%ebx)
|
||||
movq %mm3,%mm4
|
||||
pfsub %mm2,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+24,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 132(%edx),%mm6
|
||||
punpckldq 80(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,60(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,8(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 8(%edx),%mm6
|
||||
punpckldq 60(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 8(%esi),%mm6
|
||||
punpckldq 60(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,256(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1920(%ebx)
|
||||
movq 8(%eax),%mm2
|
||||
movd COS9+28,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm3,%mm2
|
||||
pfsub %mm0,%mm2
|
||||
movq 40(%eax),%mm3
|
||||
movd COS9+4,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movq 56(%eax),%mm3
|
||||
movd COS9+20,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfsub %mm3,%mm2
|
||||
movq (%eax),%mm3
|
||||
movq 16(%eax),%mm4
|
||||
movd COS9+16,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq 32(%eax),%mm4
|
||||
movd COS9+32,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
pfadd %mm1,%mm3
|
||||
movq 64(%eax),%mm4
|
||||
movd COS9+8,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq %mm2,%mm4
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+12,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 120(%edx),%mm6
|
||||
punpckldq 92(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,48(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,20(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 20(%edx),%mm6
|
||||
punpckldq 48(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 20(%esi),%mm6
|
||||
punpckldq 48(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,640(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1536(%ebx)
|
||||
movq %mm3,%mm4
|
||||
pfsub %mm2,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+20,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 128(%edx),%mm6
|
||||
punpckldq 84(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,56(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,12(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 12(%edx),%mm6
|
||||
punpckldq 56(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 12(%esi),%mm6
|
||||
punpckldq 56(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,384(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1792(%ebx)
|
||||
|
||||
movq (%eax),%mm4
|
||||
movq 16(%eax),%mm3
|
||||
pfsub %mm3,%mm4
|
||||
movq 32(%eax),%mm3
|
||||
pfadd %mm3,%mm4
|
||||
movq 48(%eax),%mm3
|
||||
pfsub %mm3,%mm4
|
||||
movq 64(%eax),%mm3
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+16,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 124(%edx),%mm6
|
||||
punpckldq 88(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,52(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,16(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 16(%edx),%mm6
|
||||
punpckldq 52(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 16(%esi),%mm6
|
||||
punpckldq 52(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,512(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1664(%ebx)
|
||||
|
||||
femms
|
||||
popl %ebx
|
||||
popl %esi
|
||||
movl %ebp,%esp
|
||||
popl %ebp
|
||||
ret
|
34
mp3lib/dct36_k7.c
Normal file
34
mp3lib/dct36_k7.c
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* dct36_k7.c - 3DNowEx(DSP)! optimized dct36()
|
||||
*
|
||||
* This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
|
||||
* <squash@mb.kcom.ne.jp>, only two types of changes have been made:
|
||||
*
|
||||
* - added new opcode PSWAPD
|
||||
* - removed PREFETCH instruction for speedup
|
||||
* - changed function name for support 3DNowEx! automatic detection
|
||||
*
|
||||
* note: because K7 processors are an aggresive out-of-order three-way
|
||||
* superscalar ones instruction order is not significand for them.
|
||||
*
|
||||
* You can find Kashiyama's original 3dnow! support patch
|
||||
* (for mpg123-0.59o) at
|
||||
* http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
|
||||
*
|
||||
* by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
|
||||
* <kim@comtec.co.jp> - after 1.Apr.1999
|
||||
*
|
||||
* Original disclaimer:
|
||||
* The author of this program disclaim whole expressed or implied
|
||||
* warranties with regard to this program, and in no event shall the
|
||||
* author of this program liable to whatever resulted from the use of
|
||||
* this program. Use it at your own risk.
|
||||
*
|
||||
* Modified by Nick Kurshev <nickols_k@mail.ru>
|
||||
*
|
||||
* 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi
|
||||
*/
|
||||
|
||||
#define __DCT36_OPTIMIZE_FOR_K7
|
||||
|
||||
#include "dct36_3dnow.c"
|
@ -1,511 +0,0 @@
|
||||
///
|
||||
/// Replacement of dct36() with AMD's 3DNowEx(DSP)! SIMD operations support
|
||||
///
|
||||
/// This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
|
||||
/// <squash@mb.kcom.ne.jp>,only some types of changes have been made:
|
||||
///
|
||||
/// - added new opcode PSWAPD
|
||||
/// - change function name for support 3DNowEx! automatic detect
|
||||
///
|
||||
/// note: because K7 processors are an aggresive out-of-order three-way
|
||||
/// superscalar ones instruction order is not significand for them.
|
||||
///
|
||||
/// Modified by Nick Kurshev <nickols_k@mail.ru>
|
||||
///
|
||||
/
|
||||
/ dct36_3dnow.s - 3DNow! optimized dct36()
|
||||
/
|
||||
/ This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
|
||||
/ <squash@mb.kcom.ne.jp>,only two types of changes have been made:
|
||||
/
|
||||
/ - remove PREFETCH instruction for speedup
|
||||
/ - change function name for support 3DNow! automatic detect
|
||||
/
|
||||
/ You can find Kashiyama's original 3dnow! support patch
|
||||
/ (for mpg123-0.59o) at
|
||||
/ http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
|
||||
/
|
||||
/ by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
|
||||
/ <kim@comtec.co.jp> - after 1.Apr.1999
|
||||
/
|
||||
|
||||
///
|
||||
/// Replacement of dct36() with AMD's 3DNow! SIMD operations support
|
||||
///
|
||||
/// Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
|
||||
///
|
||||
/// The author of this program disclaim whole expressed or implied
|
||||
/// warranties with regard to this program, and in no event shall the
|
||||
/// author of this program liable to whatever resulted from the use of
|
||||
/// this program. Use it at your own risk.
|
||||
///
|
||||
|
||||
.globl dct36_3dnowex
|
||||
.type dct36_3dnowex,@function
|
||||
dct36_3dnowex:
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $120,%esp
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
movl 8(%ebp),%eax
|
||||
movl 12(%ebp),%esi
|
||||
movl 16(%ebp),%ecx
|
||||
movl 20(%ebp),%edx
|
||||
movl 24(%ebp),%ebx
|
||||
leal -128(%ebp),%esp
|
||||
|
||||
femms
|
||||
movq (%eax),%mm0
|
||||
movq 4(%eax),%mm1
|
||||
pfadd %mm1,%mm0
|
||||
movq %mm0,4(%eax)
|
||||
psrlq $32,%mm1
|
||||
movq 12(%eax),%mm2
|
||||
punpckldq %mm2,%mm1
|
||||
pfadd %mm2,%mm1
|
||||
movq %mm1,12(%eax)
|
||||
psrlq $32,%mm2
|
||||
movq 20(%eax),%mm3
|
||||
punpckldq %mm3,%mm2
|
||||
pfadd %mm3,%mm2
|
||||
movq %mm2,20(%eax)
|
||||
psrlq $32,%mm3
|
||||
movq 28(%eax),%mm4
|
||||
punpckldq %mm4,%mm3
|
||||
pfadd %mm4,%mm3
|
||||
movq %mm3,28(%eax)
|
||||
psrlq $32,%mm4
|
||||
movq 36(%eax),%mm5
|
||||
punpckldq %mm5,%mm4
|
||||
pfadd %mm5,%mm4
|
||||
movq %mm4,36(%eax)
|
||||
psrlq $32,%mm5
|
||||
movq 44(%eax),%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfadd %mm6,%mm5
|
||||
movq %mm5,44(%eax)
|
||||
psrlq $32,%mm6
|
||||
movq 52(%eax),%mm7
|
||||
punpckldq %mm7,%mm6
|
||||
pfadd %mm7,%mm6
|
||||
movq %mm6,52(%eax)
|
||||
psrlq $32,%mm7
|
||||
movq 60(%eax),%mm0
|
||||
punpckldq %mm0,%mm7
|
||||
pfadd %mm0,%mm7
|
||||
movq %mm7,60(%eax)
|
||||
psrlq $32,%mm0
|
||||
movd 68(%eax),%mm1
|
||||
pfadd %mm1,%mm0
|
||||
movd %mm0,68(%eax)
|
||||
movd 4(%eax),%mm0
|
||||
movd 12(%eax),%mm1
|
||||
punpckldq %mm1,%mm0
|
||||
punpckldq 20(%eax),%mm1
|
||||
pfadd %mm1,%mm0
|
||||
movd %mm0,12(%eax)
|
||||
psrlq $32,%mm0
|
||||
movd %mm0,20(%eax)
|
||||
psrlq $32,%mm1
|
||||
movd 28(%eax),%mm2
|
||||
punpckldq %mm2,%mm1
|
||||
punpckldq 36(%eax),%mm2
|
||||
pfadd %mm2,%mm1
|
||||
movd %mm1,28(%eax)
|
||||
psrlq $32,%mm1
|
||||
movd %mm1,36(%eax)
|
||||
psrlq $32,%mm2
|
||||
movd 44(%eax),%mm3
|
||||
punpckldq %mm3,%mm2
|
||||
punpckldq 52(%eax),%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movd %mm2,44(%eax)
|
||||
psrlq $32,%mm2
|
||||
movd %mm2,52(%eax)
|
||||
psrlq $32,%mm3
|
||||
movd 60(%eax),%mm4
|
||||
punpckldq %mm4,%mm3
|
||||
punpckldq 68(%eax),%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movd %mm3,60(%eax)
|
||||
psrlq $32,%mm3
|
||||
movd %mm3,68(%eax)
|
||||
movq 24(%eax),%mm0
|
||||
movq 48(%eax),%mm1
|
||||
movd COS9+12,%mm2
|
||||
punpckldq %mm2,%mm2
|
||||
movd COS9+24,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm2,%mm0
|
||||
pfmul %mm3,%mm1
|
||||
pushl %eax
|
||||
movl $1,%eax
|
||||
movd %eax,%mm7
|
||||
pi2fd %mm7,%mm7
|
||||
popl %eax
|
||||
movq 8(%eax),%mm2
|
||||
movd COS9+4,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm3,%mm2
|
||||
pfadd %mm0,%mm2
|
||||
movq 40(%eax),%mm3
|
||||
movd COS9+20,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movq 56(%eax),%mm3
|
||||
movd COS9+28,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movq (%eax),%mm3
|
||||
movq 16(%eax),%mm4
|
||||
movd COS9+8,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movq 32(%eax),%mm4
|
||||
movd COS9+16,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
pfadd %mm1,%mm3
|
||||
movq 64(%eax),%mm4
|
||||
movd COS9+32,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movq %mm2,%mm4
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+0,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 108(%edx),%mm6
|
||||
punpckldq 104(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
pswapd %mm5, %mm5
|
||||
movq %mm5, 32(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 32(%edx),%mm6
|
||||
punpckldq 36(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 32(%esi),%mm6
|
||||
punpckldq 36(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,1024(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1152(%ebx)
|
||||
movq %mm3,%mm4
|
||||
pfsub %mm2,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+32,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 140(%edx),%mm6
|
||||
punpckldq 72(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,68(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,0(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 0(%edx),%mm6
|
||||
punpckldq 68(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 0(%esi),%mm6
|
||||
punpckldq 68(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,0(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,2176(%ebx)
|
||||
movq 8(%eax),%mm2
|
||||
movq 40(%eax),%mm3
|
||||
pfsub %mm3,%mm2
|
||||
movq 56(%eax),%mm3
|
||||
pfsub %mm3,%mm2
|
||||
movd COS9+12,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm3,%mm2
|
||||
movq 16(%eax),%mm3
|
||||
movq 32(%eax),%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq 64(%eax),%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movd COS9+24,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
movq 48(%eax),%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq (%eax),%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movq %mm2,%mm4
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+4,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 112(%edx),%mm6
|
||||
punpckldq 100(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,40(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,28(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 28(%edx),%mm6
|
||||
punpckldq 40(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 28(%esi),%mm6
|
||||
punpckldq 40(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,896(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1280(%ebx)
|
||||
movq %mm3,%mm4
|
||||
pfsub %mm2,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+28,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 136(%edx),%mm6
|
||||
punpckldq 76(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,64(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,4(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 4(%edx),%mm6
|
||||
punpckldq 64(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 4(%esi),%mm6
|
||||
punpckldq 64(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,128(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,2048(%ebx)
|
||||
|
||||
movq 8(%eax),%mm2
|
||||
movd COS9+20,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm3,%mm2
|
||||
pfsub %mm0,%mm2
|
||||
movq 40(%eax),%mm3
|
||||
movd COS9+28,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfsub %mm3,%mm2
|
||||
movq 56(%eax),%mm3
|
||||
movd COS9+4,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movq (%eax),%mm3
|
||||
movq 16(%eax),%mm4
|
||||
movd COS9+32,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq 32(%eax),%mm4
|
||||
movd COS9+8,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfsub %mm4,%mm3
|
||||
pfadd %mm1,%mm3
|
||||
movq 64(%eax),%mm4
|
||||
movd COS9+16,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
movq %mm2,%mm4
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+8,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 116(%edx),%mm6
|
||||
punpckldq 96(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,44(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,24(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 24(%edx),%mm6
|
||||
punpckldq 44(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 24(%esi),%mm6
|
||||
punpckldq 44(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,768(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1408(%ebx)
|
||||
movq %mm3,%mm4
|
||||
pfsub %mm2,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+24,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 132(%edx),%mm6
|
||||
punpckldq 80(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,60(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,8(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 8(%edx),%mm6
|
||||
punpckldq 60(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 8(%esi),%mm6
|
||||
punpckldq 60(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,256(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1920(%ebx)
|
||||
movq 8(%eax),%mm2
|
||||
movd COS9+28,%mm3
|
||||
punpckldq %mm3,%mm3
|
||||
pfmul %mm3,%mm2
|
||||
pfsub %mm0,%mm2
|
||||
movq 40(%eax),%mm3
|
||||
movd COS9+4,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfadd %mm3,%mm2
|
||||
movq 56(%eax),%mm3
|
||||
movd COS9+20,%mm4
|
||||
punpckldq %mm4,%mm4
|
||||
pfmul %mm4,%mm3
|
||||
pfsub %mm3,%mm2
|
||||
movq (%eax),%mm3
|
||||
movq 16(%eax),%mm4
|
||||
movd COS9+16,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq 32(%eax),%mm4
|
||||
movd COS9+32,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfadd %mm4,%mm3
|
||||
pfadd %mm1,%mm3
|
||||
movq 64(%eax),%mm4
|
||||
movd COS9+8,%mm5
|
||||
punpckldq %mm5,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
pfsub %mm4,%mm3
|
||||
movq %mm2,%mm4
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+12,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 120(%edx),%mm6
|
||||
punpckldq 92(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,48(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,20(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 20(%edx),%mm6
|
||||
punpckldq 48(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 20(%esi),%mm6
|
||||
punpckldq 48(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,640(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1536(%ebx)
|
||||
movq %mm3,%mm4
|
||||
pfsub %mm2,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+20,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 128(%edx),%mm6
|
||||
punpckldq 84(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,56(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,12(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 12(%edx),%mm6
|
||||
punpckldq 56(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 12(%esi),%mm6
|
||||
punpckldq 56(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,384(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1792(%ebx)
|
||||
|
||||
movq (%eax),%mm4
|
||||
movq 16(%eax),%mm3
|
||||
pfsub %mm3,%mm4
|
||||
movq 32(%eax),%mm3
|
||||
pfadd %mm3,%mm4
|
||||
movq 48(%eax),%mm3
|
||||
pfsub %mm3,%mm4
|
||||
movq 64(%eax),%mm3
|
||||
pfadd %mm3,%mm4
|
||||
movq %mm7,%mm5
|
||||
punpckldq tfcos36+16,%mm5
|
||||
pfmul %mm5,%mm4
|
||||
movq %mm4,%mm5
|
||||
pfacc %mm5,%mm5
|
||||
movd 124(%edx),%mm6
|
||||
punpckldq 88(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd %mm5,52(%ecx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,16(%ecx)
|
||||
movq %mm4,%mm6
|
||||
punpckldq %mm6,%mm5
|
||||
pfsub %mm6,%mm5
|
||||
punpckhdq %mm5,%mm5
|
||||
movd 16(%edx),%mm6
|
||||
punpckldq 52(%edx),%mm6
|
||||
pfmul %mm6,%mm5
|
||||
movd 16(%esi),%mm6
|
||||
punpckldq 52(%esi),%mm6
|
||||
pfadd %mm6,%mm5
|
||||
movd %mm5,512(%ebx)
|
||||
psrlq $32,%mm5
|
||||
movd %mm5,1664(%ebx)
|
||||
|
||||
femms
|
||||
popl %ebx
|
||||
popl %esi
|
||||
movl %ebp,%esp
|
||||
popl %ebp
|
||||
ret
|
Loading…
Reference in New Issue
Block a user