1
0
mirror of https://github.com/mpv-player/mpv synced 2025-03-22 03:08:33 +00:00

Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.

Neither variant is valid C99 syntax, but __asm__ is the most portable variant.


git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@27788 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
diego 2008-10-16 18:59:27 +00:00
parent 629db77d89
commit 6b52a2e974
43 changed files with 238 additions and 243 deletions

View File

@ -86,7 +86,7 @@ static unsigned int GetTimer(){
static inline unsigned long long int read_tsc( void ) static inline unsigned long long int read_tsc( void )
{ {
unsigned long long int retval; unsigned long long int retval;
__asm __volatile ("rdtsc":"=A"(retval)::"memory"); __asm__ __volatile ("rdtsc":"=A"(retval)::"memory");
return retval; return retval;
} }

6
configure vendored
View File

@ -2124,8 +2124,8 @@ EOF
cat > $TMPC << EOF cat > $TMPC << EOF
int main(void) { int main(void) {
unsigned long ver, mask; unsigned long ver, mask;
asm ("implver %0" : "=r" (ver)); __asm__ ("implver %0" : "=r" (ver));
asm ("amask %1, %0" : "=r" (mask) : "r" (-1)); __asm__ ("amask %1, %0" : "=r" (mask) : "r" (-1));
printf("%ld-%x\n", ver, ~mask); printf("%ld-%x\n", ver, ~mask);
return 0; return 0;
} }
@ -2374,7 +2374,7 @@ echocheck ".align is a power of two"
if test "$_asmalign_pot" = auto ; then if test "$_asmalign_pot" = auto ; then
_asmalign_pot=no _asmalign_pot=no
cat > $TMPC << EOF cat > $TMPC << EOF
int main(void) { asm (".align 3"); return 0; } int main(void) { __asm__ (".align 3"); return 0; }
EOF EOF
cc_check && _asmalign_pot=yes cc_check && _asmalign_pot=yes
fi fi

View File

@ -85,14 +85,14 @@ static void
do_cpuid(unsigned int ax, unsigned int *p) do_cpuid(unsigned int ax, unsigned int *p)
{ {
#if 0 #if 0
__asm __volatile( __asm__ __volatile(
"cpuid;" "cpuid;"
: "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
: "0" (ax) : "0" (ax)
); );
#else #else
// code from libavcodec: // code from libavcodec:
__asm __volatile __asm__ __volatile__
("mov %%"REG_b", %%"REG_S"\n\t" ("mov %%"REG_b", %%"REG_S"\n\t"
"cpuid\n\t" "cpuid\n\t"
"xchg %%"REG_b", %%"REG_S "xchg %%"REG_b", %%"REG_S
@ -400,7 +400,7 @@ static void check_os_katmai_support( void )
if ( gCpuCaps.hasSSE ) { if ( gCpuCaps.hasSSE ) {
mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " ); mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );
exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse); exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
__asm __volatile ("xorps %xmm0, %xmm0"); __asm__ __volatile ("xorps %xmm0, %xmm0");
SetUnhandledExceptionFilter(exc_fil); SetUnhandledExceptionFilter(exc_fil);
mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" ); mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );
} }
@ -409,7 +409,7 @@ static void check_os_katmai_support( void )
if ( gCpuCaps.hasSSE ) { if ( gCpuCaps.hasSSE ) {
mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " ); mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );
DosSetExceptionHandler( &RegRec ); DosSetExceptionHandler( &RegRec );
__asm __volatile ("xorps %xmm0, %xmm0"); __asm__ __volatile ("xorps %xmm0, %xmm0");
DosUnsetExceptionHandler( &RegRec ); DosUnsetExceptionHandler( &RegRec );
mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" ); mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );
} }
@ -432,8 +432,8 @@ static void check_os_katmai_support( void )
if ( gCpuCaps.hasSSE ) { if ( gCpuCaps.hasSSE ) {
mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " ); mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );
// __asm __volatile ("xorps %%xmm0, %%xmm0"); // __asm__ __volatile ("xorps %%xmm0, %%xmm0");
__asm __volatile ("xorps %xmm0, %xmm0"); __asm__ __volatile ("xorps %xmm0, %xmm0");
mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" ); mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );
} }
@ -532,7 +532,7 @@ void GetCpuCaps( CpuCaps *caps)
} else { } else {
canjump = 1; canjump = 1;
asm volatile ("mtspr 256, %0\n\t" __asm__ volatile ("mtspr 256, %0\n\t"
"vand %%v0, %%v0, %%v0" "vand %%v0, %%v0, %%v0"
: :
: "r" (-1)); : "r" (-1));

View File

@ -49,9 +49,9 @@ cpuid(int func) {
cpuid_regs_t regs; cpuid_regs_t regs;
#define CPUID ".byte 0x0f, 0xa2; " #define CPUID ".byte 0x0f, 0xa2; "
#ifdef __x86_64__ #ifdef __x86_64__
asm("mov %%rbx, %%rsi\n\t" __asm__("mov %%rbx, %%rsi\n\t"
#else #else
asm("mov %%ebx, %%esi\n\t" __asm__("mov %%ebx, %%esi\n\t"
#endif #endif
CPUID"\n\t" CPUID"\n\t"
#ifdef __x86_64__ #ifdef __x86_64__
@ -70,7 +70,7 @@ rdtsc(void)
{ {
uint64_t i; uint64_t i;
#define RDTSC ".byte 0x0f, 0x31; " #define RDTSC ".byte 0x0f, 0x31; "
asm volatile (RDTSC : "=A"(i) : ); __asm__ volatile (RDTSC : "=A"(i) : );
return i; return i;
} }

View File

@ -129,7 +129,7 @@ static int swap_fourcc __initdata = 0;
static inline double FastSin(double x) static inline double FastSin(double x)
{ {
register double res; register double res;
__asm __volatile("fsin":"=t"(res):"0"(x)); __asm__ __volatile("fsin":"=t"(res):"0"(x));
return res; return res;
} }
#undef sin #undef sin
@ -138,7 +138,7 @@ static inline double FastSin(double x)
static inline double FastCos(double x) static inline double FastCos(double x)
{ {
register double res; register double res;
__asm __volatile("fcos":"=t"(res):"0"(x)); __asm__ __volatile("fcos":"=t"(res):"0"(x));
return res; return res;
} }
#undef cos #undef cos

View File

@ -689,7 +689,7 @@ void upmix_C (sample_t * samples, int acmod, int output)
#if defined(ARCH_X86) || defined(ARCH_X86_64) #if defined(ARCH_X86) || defined(ARCH_X86_64)
static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %2, %%xmm7 \n\t" "movlps %2, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -712,7 +712,7 @@ static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
static void mix3to1_SSE (sample_t * samples, sample_t bias) static void mix3to1_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -733,7 +733,7 @@ static void mix3to1_SSE (sample_t * samples, sample_t bias)
static void mix4to1_SSE (sample_t * samples, sample_t bias) static void mix4to1_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -755,7 +755,7 @@ static void mix4to1_SSE (sample_t * samples, sample_t bias)
static void mix5to1_SSE (sample_t * samples, sample_t bias) static void mix5to1_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -778,7 +778,7 @@ static void mix5to1_SSE (sample_t * samples, sample_t bias)
static void mix3to2_SSE (sample_t * samples, sample_t bias) static void mix3to2_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -801,7 +801,7 @@ static void mix3to2_SSE (sample_t * samples, sample_t bias)
static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %2, %%xmm7 \n\t" "movlps %2, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -824,7 +824,7 @@ static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
static void mix21toS_SSE (sample_t * samples, sample_t bias) static void mix21toS_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -848,7 +848,7 @@ static void mix21toS_SSE (sample_t * samples, sample_t bias)
static void mix31to2_SSE (sample_t * samples, sample_t bias) static void mix31to2_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -872,7 +872,7 @@ static void mix31to2_SSE (sample_t * samples, sample_t bias)
static void mix31toS_SSE (sample_t * samples, sample_t bias) static void mix31toS_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -898,7 +898,7 @@ static void mix31toS_SSE (sample_t * samples, sample_t bias)
static void mix22toS_SSE (sample_t * samples, sample_t bias) static void mix22toS_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -923,7 +923,7 @@ static void mix22toS_SSE (sample_t * samples, sample_t bias)
static void mix32to2_SSE (sample_t * samples, sample_t bias) static void mix32to2_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -947,7 +947,7 @@ static void mix32to2_SSE (sample_t * samples, sample_t bias)
static void mix32toS_SSE (sample_t * samples, sample_t bias) static void mix32toS_SSE (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %1, %%xmm7 \n\t" "movlps %1, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -974,7 +974,7 @@ static void mix32toS_SSE (sample_t * samples, sample_t bias)
static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movlps %2, %%xmm7 \n\t" "movlps %2, %%xmm7 \n\t"
"shufps $0x00, %%xmm7, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -997,7 +997,7 @@ static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
static void zero_MMX(sample_t * samples) static void zero_MMX(sample_t * samples)
{ {
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
ASMALIGN(4) ASMALIGN(4)
@ -1223,7 +1223,7 @@ static void upmix_MMX (sample_t * samples, int acmod, int output)
static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %2, %%mm7 \n\t" "movd %2, %%mm7 \n\t"
"punpckldq %2, %%mm7 \n\t" "punpckldq %2, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1254,7 +1254,7 @@ static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
static void mix3to1_3dnow (sample_t * samples, sample_t bias) static void mix3to1_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1281,7 +1281,7 @@ static void mix3to1_3dnow (sample_t * samples, sample_t bias)
static void mix4to1_3dnow (sample_t * samples, sample_t bias) static void mix4to1_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1310,7 +1310,7 @@ static void mix4to1_3dnow (sample_t * samples, sample_t bias)
static void mix5to1_3dnow (sample_t * samples, sample_t bias) static void mix5to1_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1341,7 +1341,7 @@ static void mix5to1_3dnow (sample_t * samples, sample_t bias)
static void mix3to2_3dnow (sample_t * samples, sample_t bias) static void mix3to2_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1372,7 +1372,7 @@ static void mix3to2_3dnow (sample_t * samples, sample_t bias)
static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %2, %%mm7 \n\t" "movd %2, %%mm7 \n\t"
"punpckldq %2, %%mm7 \n\t" "punpckldq %2, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1403,7 +1403,7 @@ static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
static void mix21toS_3dnow (sample_t * samples, sample_t bias) static void mix21toS_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1436,7 +1436,7 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias)
static void mix31to2_3dnow (sample_t * samples, sample_t bias) static void mix31to2_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1469,7 +1469,7 @@ static void mix31to2_3dnow (sample_t * samples, sample_t bias)
static void mix31toS_3dnow (sample_t * samples, sample_t bias) static void mix31toS_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1506,7 +1506,7 @@ static void mix31toS_3dnow (sample_t * samples, sample_t bias)
static void mix22toS_3dnow (sample_t * samples, sample_t bias) static void mix22toS_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1541,7 +1541,7 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias)
static void mix32to2_3dnow (sample_t * samples, sample_t bias) static void mix32to2_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %1, %%mm7 \n\t" "movd %1, %%mm7 \n\t"
"punpckldq %1, %%mm7 \n\t" "punpckldq %1, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1575,7 +1575,7 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias)
/* todo: should be optimized better */ /* todo: should be optimized better */
static void mix32toS_3dnow (sample_t * samples, sample_t bias) static void mix32toS_3dnow (sample_t * samples, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
@ -1614,7 +1614,7 @@ static void mix32toS_3dnow (sample_t * samples, sample_t bias)
static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
{ {
asm volatile( __asm__ volatile(
"movd %2, %%mm7 \n\t" "movd %2, %%mm7 \n\t"
"punpckldq %2, %%mm7 \n\t" "punpckldq %2, %%mm7 \n\t"
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
@ -1782,7 +1782,7 @@ static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t b
memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
break; break;
} }
__asm __volatile("femms":::"memory"); __asm__ volatile("femms":::"memory");
} }
#endif // ARCH_X86 || ARCH_X86_64 #endif // ARCH_X86 || ARCH_X86_64

View File

@ -750,7 +750,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
/* Pre IFFT complex multiply plus IFFT cmplx conjugate */ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
/* Bit reversed shuffling */ /* Bit reversed shuffling */
asm volatile( __asm__ volatile(
"xor %%"REG_S", %%"REG_S" \n\t" "xor %%"REG_S", %%"REG_S" \n\t"
"lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t"
"mov $1008, %%"REG_D" \n\t" "mov $1008, %%"REG_D" \n\t"
@ -810,7 +810,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
/* 1. iteration */ /* 1. iteration */
// Note w[0][0]={1,0} // Note w[0][0]={1,0}
asm volatile( __asm__ volatile(
"xorps %%xmm1, %%xmm1 \n\t" "xorps %%xmm1, %%xmm1 \n\t"
"xorps %%xmm2, %%xmm2 \n\t" "xorps %%xmm2, %%xmm2 \n\t"
"mov %0, %%"REG_S" \n\t" "mov %0, %%"REG_S" \n\t"
@ -832,7 +832,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
/* 2. iteration */ /* 2. iteration */
// Note w[1]={{1,0}, {0,-1}} // Note w[1]={{1,0}, {0,-1}}
asm volatile( __asm__ volatile(
"movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1
"mov %0, %%"REG_S" \n\t" "mov %0, %%"REG_S" \n\t"
ASMALIGN(4) ASMALIGN(4)
@ -860,7 +860,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
Note sseW2+32={0,0,-sqrt(2),-sqrt(2)) Note sseW2+32={0,0,-sqrt(2),-sqrt(2))
Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) Note sseW2+48={1,-1,sqrt(2),-sqrt(2))
*/ */
asm volatile( __asm__ volatile(
"movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
"movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
"xorps %%xmm5, %%xmm5 \n\t" "xorps %%xmm5, %%xmm5 \n\t"
@ -905,7 +905,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
two_m_plus_one = two_m<<1; two_m_plus_one = two_m<<1;
two_m_plus_one_shl3 = (two_m_plus_one<<3); two_m_plus_one_shl3 = (two_m_plus_one<<3);
buf_offset = buf+128; buf_offset = buf+128;
asm volatile( __asm__ volatile(
"mov %0, %%"REG_S" \n\t" "mov %0, %%"REG_S" \n\t"
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
@ -937,7 +937,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
} }
/* Post IFFT complex multiply plus IFFT complex conjugate*/ /* Post IFFT complex multiply plus IFFT complex conjugate*/
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
@ -960,7 +960,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
window_ptr = a52_imdct_window; window_ptr = a52_imdct_window;
/* Window and convert to real valued signal */ /* Window and convert to real valued signal */
asm volatile( __asm__ volatile(
"xor %%"REG_D", %%"REG_D" \n\t" // 0 "xor %%"REG_D", %%"REG_D" \n\t" // 0
"xor %%"REG_S", %%"REG_S" \n\t" // 0 "xor %%"REG_S", %%"REG_S" \n\t" // 0
"movss %3, %%xmm2 \n\t" // bias "movss %3, %%xmm2 \n\t" // bias
@ -987,7 +987,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
delay_ptr+=128; delay_ptr+=128;
// window_ptr+=128; // window_ptr+=128;
asm volatile( __asm__ volatile(
"mov $1024, %%"REG_D" \n\t" // 512 "mov $1024, %%"REG_D" \n\t" // 512
"xor %%"REG_S", %%"REG_S" \n\t" // 0 "xor %%"REG_S", %%"REG_S" \n\t" // 0
"movss %3, %%xmm2 \n\t" // bias "movss %3, %%xmm2 \n\t" // bias
@ -1016,7 +1016,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
/* The trailing edge of the window goes into the delay line */ /* The trailing edge of the window goes into the delay line */
delay_ptr = delay; delay_ptr = delay;
asm volatile( __asm__ volatile(
"xor %%"REG_D", %%"REG_D" \n\t" // 0 "xor %%"REG_D", %%"REG_D" \n\t" // 0
"xor %%"REG_S", %%"REG_S" \n\t" // 0 "xor %%"REG_S", %%"REG_S" \n\t" // 0
ASMALIGN(4) ASMALIGN(4)
@ -1038,7 +1038,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
delay_ptr+=128; delay_ptr+=128;
// window_ptr-=128; // window_ptr-=128;
asm volatile( __asm__ volatile(
"mov $1024, %%"REG_D" \n\t" // 1024 "mov $1024, %%"REG_D" \n\t" // 1024
"xor %%"REG_S", %%"REG_S" \n\t" // 0 "xor %%"REG_S", %%"REG_S" \n\t" // 0
ASMALIGN(4) ASMALIGN(4)

View File

@ -431,7 +431,7 @@ imdct_do_512_3dnow
*/ */
FFT_128P_3DNOW (&buf[0]); FFT_128P_3DNOW (&buf[0]);
// asm volatile ("femms \n\t":::"memory"); // __asm__ volatile ("femms \n\t":::"memory");
/* Post IFFT complex multiply plus IFFT complex conjugate*/ /* Post IFFT complex multiply plus IFFT complex conjugate*/
#if 1 #if 1
@ -489,7 +489,7 @@ imdct_do_512_3dnow
/* Window and convert to real valued signal */ /* Window and convert to real valued signal */
#if 1 #if 1
asm volatile ( __asm__ volatile (
"movd (%0), %%mm3 \n\t" "movd (%0), %%mm3 \n\t"
"punpckldq %%mm3, %%mm3 \n\t" "punpckldq %%mm3, %%mm3 \n\t"
:: "r" (&bias) :: "r" (&bias)

View File

@ -1,6 +1,6 @@
--- include/a52.h 2006-06-12 15:04:57.000000000 +0200 --- include/a52.h 2006-06-12 15:04:57.000000000 +0200
+++ liba52/a52.h 2006-06-05 02:23:02.000000000 +0200 +++ liba52/a52.h 2006-06-05 02:23:02.000000000 +0200
@@ -59,4 +63,9 @@ @@ -63,4 +63,9 @@
int a52_block (a52_state_t * state); int a52_block (a52_state_t * state);
void a52_free (a52_state_t * state); void a52_free (a52_state_t * state);
@ -12,7 +12,7 @@
#endif /* A52_H */ #endif /* A52_H */
--- liba52/a52_internal.h 2006-06-12 15:05:07.000000000 +0200 --- liba52/a52_internal.h 2006-06-12 15:05:07.000000000 +0200
+++ liba52/a52_internal.h 2006-06-05 02:23:02.000000000 +0200 +++ liba52/a52_internal.h 2006-06-05 02:23:02.000000000 +0200
@@ -103,18 +107,34 @@ @@ -107,18 +107,34 @@
#define DELTA_BIT_NONE (2) #define DELTA_BIT_NONE (2)
#define DELTA_BIT_RESERVED (3) #define DELTA_BIT_RESERVED (3)
@ -52,7 +52,7 @@
+void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias); +void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias);
--- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200 --- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200
+++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200 +++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200
@@ -31,6 +35,10 @@ @@ -35,6 +35,10 @@
#define BUFFER_SIZE 4096 #define BUFFER_SIZE 4096
@ -63,7 +63,7 @@
void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf) void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf)
{ {
int align; int align;
@@ -38,6 +46,9 @@ @@ -42,6 +46,9 @@
align = (long)buf & 3; align = (long)buf & 3;
state->buffer_start = (uint32_t *) (buf - align); state->buffer_start = (uint32_t *) (buf - align);
state->bits_left = 0; state->bits_left = 0;
@ -75,7 +75,7 @@
--- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200 --- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200
+++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200 +++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200
@@ -21,6 +25,42 @@ @@ -25,6 +25,42 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/ */
@ -118,7 +118,7 @@
/* (stolen from the kernel) */ /* (stolen from the kernel) */
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
@@ -28,7 +74,7 @@ @@ -32,7 +74,7 @@
#else #else
@ -127,7 +127,7 @@
# define swab32(x) __i386_swab32(x) # define swab32(x) __i386_swab32(x)
static inline const uint32_t __i386_swab32(uint32_t x) static inline const uint32_t __i386_swab32(uint32_t x)
@@ -39,19 +85,34 @@ @@ -43,19 +85,34 @@
# else # else
@ -166,7 +166,7 @@
uint32_t result; uint32_t result;
if (num_bits < state->bits_left) { if (num_bits < state->bits_left) {
@@ -61,10 +122,29 @@ @@ -65,10 +122,29 @@
} }
return a52_bitstream_get_bh (state, num_bits); return a52_bitstream_get_bh (state, num_bits);
@ -196,7 +196,7 @@
int32_t result; int32_t result;
if (num_bits < state->bits_left) { if (num_bits < state->bits_left) {
@@ -74,4 +154,5 @@ @@ -78,4 +154,5 @@
} }
return a52_bitstream_get_bh_2 (state, num_bits); return a52_bitstream_get_bh_2 (state, num_bits);
@ -204,7 +204,7 @@
} }
--- liba52/downmix.c 2006-06-12 15:17:53.000000000 +0200 --- liba52/downmix.c 2006-06-12 15:17:53.000000000 +0200
+++ liba52/downmix.c 2006-06-05 02:23:02.000000000 +0200 +++ liba52/downmix.c 2006-06-05 02:23:02.000000000 +0200
@@ -19,18 +23,46 @@ @@ -23,18 +23,46 @@
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@ -251,7 +251,7 @@
int a52_downmix_init (int input, int flags, sample_t * level, int a52_downmix_init (int input, int flags, sample_t * level,
sample_t clev, sample_t slev) sample_t clev, sample_t slev)
{ {
@@ -447,7 +479,7 @@ @@ -451,7 +479,7 @@
samples[i] = 0; samples[i] = 0;
} }
@ -260,7 +260,7 @@
sample_t clev, sample_t slev) sample_t clev, sample_t slev)
{ {
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
@@ -559,7 +591,7 @@ @@ -563,7 +591,7 @@
break; break;
case CONVERT (A52_3F2R, A52_2F1R): case CONVERT (A52_3F2R, A52_2F1R):
@ -269,7 +269,7 @@
move2to1 (samples + 768, samples + 512, bias); move2to1 (samples + 768, samples + 512, bias);
break; break;
@@ -583,12 +615,12 @@ @@ -587,12 +615,12 @@
break; break;
case CONVERT (A52_3F1R, A52_3F2R): case CONVERT (A52_3F1R, A52_3F2R):
@ -284,7 +284,7 @@
{ {
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
@@ -653,3 +685,1104 @@ @@ -657,3 +685,1104 @@
goto mix_31to21; goto mix_31to21;
} }
} }
@ -292,7 +292,7 @@
+#if defined(ARCH_X86) || defined(ARCH_X86_64) +#if defined(ARCH_X86) || defined(ARCH_X86_64)
+static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) +static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %2, %%xmm7 \n\t" + "movlps %2, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -315,7 +315,7 @@
+ +
+static void mix3to1_SSE (sample_t * samples, sample_t bias) +static void mix3to1_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -336,7 +336,7 @@
+ +
+static void mix4to1_SSE (sample_t * samples, sample_t bias) +static void mix4to1_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -358,7 +358,7 @@
+ +
+static void mix5to1_SSE (sample_t * samples, sample_t bias) +static void mix5to1_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -381,7 +381,7 @@
+ +
+static void mix3to2_SSE (sample_t * samples, sample_t bias) +static void mix3to2_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -404,7 +404,7 @@
+ +
+static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) +static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %2, %%xmm7 \n\t" + "movlps %2, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -427,7 +427,7 @@
+ +
+static void mix21toS_SSE (sample_t * samples, sample_t bias) +static void mix21toS_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -451,7 +451,7 @@
+ +
+static void mix31to2_SSE (sample_t * samples, sample_t bias) +static void mix31to2_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -475,7 +475,7 @@
+ +
+static void mix31toS_SSE (sample_t * samples, sample_t bias) +static void mix31toS_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -501,7 +501,7 @@
+ +
+static void mix22toS_SSE (sample_t * samples, sample_t bias) +static void mix22toS_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -526,7 +526,7 @@
+ +
+static void mix32to2_SSE (sample_t * samples, sample_t bias) +static void mix32to2_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -550,7 +550,7 @@
+ +
+static void mix32toS_SSE (sample_t * samples, sample_t bias) +static void mix32toS_SSE (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %1, %%xmm7 \n\t" + "movlps %1, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -577,7 +577,7 @@
+ +
+static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) +static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movlps %2, %%xmm7 \n\t" + "movlps %2, %%xmm7 \n\t"
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t" + "shufps $0x00, %%xmm7, %%xmm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -600,7 +600,7 @@
+ +
+static void zero_MMX(sample_t * samples) +static void zero_MMX(sample_t * samples)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
+ "pxor %%mm0, %%mm0 \n\t" + "pxor %%mm0, %%mm0 \n\t"
+ ASMALIGN(4) + ASMALIGN(4)
@ -826,7 +826,7 @@
+ +
+static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) +static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %2, %%mm7 \n\t" + "movd %2, %%mm7 \n\t"
+ "punpckldq %2, %%mm7 \n\t" + "punpckldq %2, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -857,7 +857,7 @@
+ +
+static void mix3to1_3dnow (sample_t * samples, sample_t bias) +static void mix3to1_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %1, %%mm7 \n\t" + "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -884,7 +884,7 @@
+ +
+static void mix4to1_3dnow (sample_t * samples, sample_t bias) +static void mix4to1_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %1, %%mm7 \n\t" + "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -913,7 +913,7 @@
+ +
+static void mix5to1_3dnow (sample_t * samples, sample_t bias) +static void mix5to1_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %1, %%mm7 \n\t" + "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -944,7 +944,7 @@
+ +
+static void mix3to2_3dnow (sample_t * samples, sample_t bias) +static void mix3to2_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %1, %%mm7 \n\t" + "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -975,7 +975,7 @@
+ +
+static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) +static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %2, %%mm7 \n\t" + "movd %2, %%mm7 \n\t"
+ "punpckldq %2, %%mm7 \n\t" + "punpckldq %2, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -1006,7 +1006,7 @@
+ +
+static void mix21toS_3dnow (sample_t * samples, sample_t bias) +static void mix21toS_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %1, %%mm7 \n\t" + "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -1039,7 +1039,7 @@
+ +
+static void mix31to2_3dnow (sample_t * samples, sample_t bias) +static void mix31to2_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %1, %%mm7 \n\t" + "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -1072,7 +1072,7 @@
+ +
+static void mix31toS_3dnow (sample_t * samples, sample_t bias) +static void mix31toS_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %1, %%mm7 \n\t" + "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -1109,7 +1109,7 @@
+ +
+static void mix22toS_3dnow (sample_t * samples, sample_t bias) +static void mix22toS_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %1, %%mm7 \n\t" + "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -1144,7 +1144,7 @@
+ +
+static void mix32to2_3dnow (sample_t * samples, sample_t bias) +static void mix32to2_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %1, %%mm7 \n\t" + "movd %1, %%mm7 \n\t"
+ "punpckldq %1, %%mm7 \n\t" + "punpckldq %1, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -1178,7 +1178,7 @@
+/* todo: should be optimized better */ +/* todo: should be optimized better */
+static void mix32toS_3dnow (sample_t * samples, sample_t bias) +static void mix32toS_3dnow (sample_t * samples, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
+ ASMALIGN(4) + ASMALIGN(4)
+ "1: \n\t" + "1: \n\t"
@ -1217,7 +1217,7 @@
+ +
+static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) +static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
+{ +{
+ asm volatile( + __asm__ volatile(
+ "movd %2, %%mm7 \n\t" + "movd %2, %%mm7 \n\t"
+ "punpckldq %2, %%mm7 \n\t" + "punpckldq %2, %%mm7 \n\t"
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
@ -1385,13 +1385,13 @@
+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
+ break; + break;
+ } + }
+ __asm __volatile("femms":::"memory"); + __asm__ volatile("femms":::"memory");
+} +}
+ +
+#endif // ARCH_X86 || ARCH_X86_64 +#endif // ARCH_X86 || ARCH_X86_64
--- liba52/imdct.c 2008-02-19 00:18:33.000000000 +0100 --- liba52/imdct.c 2008-02-19 00:18:33.000000000 +0100
+++ liba52/imdct.c 2008-02-19 00:16:40.000000000 +0100 +++ liba52/imdct.c 2008-02-19 00:16:40.000000000 +0100
@@ -22,6 +26,11 @@ @@ -26,6 +26,11 @@
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@ -1403,7 +1403,7 @@
*/ */
#include "config.h" #include "config.h"
@@ -39,12 +48,49 @@ @@ -43,12 +48,49 @@
#include "a52.h" #include "a52.h"
#include "a52_internal.h" #include "a52_internal.h"
#include "mm_accel.h" #include "mm_accel.h"
@ -1453,7 +1453,7 @@
static uint8_t fftorder[] = { static uint8_t fftorder[] = {
0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
@@ -56,6 +102,40 @@ @@ -60,6 +102,40 @@
6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
}; };
@ -1494,7 +1494,7 @@
/* Root values for IFFT */ /* Root values for IFFT */
static sample_t roots16[3]; static sample_t roots16[3];
static sample_t roots32[7]; static sample_t roots32[7];
@@ -241,7 +321,7 @@ @@ -245,7 +321,7 @@
ifft_pass (buf, roots128 - 32, 32); ifft_pass (buf, roots128 - 32, 32);
} }
@ -1503,7 +1503,7 @@
{ {
int i, k; int i, k;
sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
@@ -285,6 +365,701 @@ @@ -289,6 +365,701 @@
} }
} }
@ -1892,7 +1892,7 @@
+ +
+ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
+ /* Bit reversed shuffling */ + /* Bit reversed shuffling */
+ asm volatile( + __asm__ volatile(
+ "xor %%"REG_S", %%"REG_S" \n\t" + "xor %%"REG_S", %%"REG_S" \n\t"
+ "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" + "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t"
+ "mov $1008, %%"REG_D" \n\t" + "mov $1008, %%"REG_D" \n\t"
@ -1952,7 +1952,7 @@
+ +
+ /* 1. iteration */ + /* 1. iteration */
+ // Note w[0][0]={1,0} + // Note w[0][0]={1,0}
+ asm volatile( + __asm__ volatile(
+ "xorps %%xmm1, %%xmm1 \n\t" + "xorps %%xmm1, %%xmm1 \n\t"
+ "xorps %%xmm2, %%xmm2 \n\t" + "xorps %%xmm2, %%xmm2 \n\t"
+ "mov %0, %%"REG_S" \n\t" + "mov %0, %%"REG_S" \n\t"
@ -1974,7 +1974,7 @@
+ +
+ /* 2. iteration */ + /* 2. iteration */
+ // Note w[1]={{1,0}, {0,-1}} + // Note w[1]={{1,0}, {0,-1}}
+ asm volatile( + __asm__ volatile(
+ "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 + "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1
+ "mov %0, %%"REG_S" \n\t" + "mov %0, %%"REG_S" \n\t"
+ ASMALIGN(4) + ASMALIGN(4)
@ -2002,7 +2002,7 @@
+ Note sseW2+32={0,0,-sqrt(2),-sqrt(2)) + Note sseW2+32={0,0,-sqrt(2),-sqrt(2))
+ Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) + Note sseW2+48={1,-1,sqrt(2),-sqrt(2))
+*/ +*/
+ asm volatile( + __asm__ volatile(
+ "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" + "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
+ "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" + "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
+ "xorps %%xmm5, %%xmm5 \n\t" + "xorps %%xmm5, %%xmm5 \n\t"
@ -2047,7 +2047,7 @@
+ two_m_plus_one = two_m<<1; + two_m_plus_one = two_m<<1;
+ two_m_plus_one_shl3 = (two_m_plus_one<<3); + two_m_plus_one_shl3 = (two_m_plus_one<<3);
+ buf_offset = buf+128; + buf_offset = buf+128;
+ asm volatile( + __asm__ volatile(
+ "mov %0, %%"REG_S" \n\t" + "mov %0, %%"REG_S" \n\t"
+ ASMALIGN(4) + ASMALIGN(4)
+ "1: \n\t" + "1: \n\t"
@ -2079,7 +2079,7 @@
+ } + }
+ +
+ /* Post IFFT complex multiply plus IFFT complex conjugate*/ + /* Post IFFT complex multiply plus IFFT complex conjugate*/
+ asm volatile( + __asm__ volatile(
+ "mov $-1024, %%"REG_S" \n\t" + "mov $-1024, %%"REG_S" \n\t"
+ ASMALIGN(4) + ASMALIGN(4)
+ "1: \n\t" + "1: \n\t"
@ -2102,7 +2102,7 @@
+ window_ptr = a52_imdct_window; + window_ptr = a52_imdct_window;
+ +
+ /* Window and convert to real valued signal */ + /* Window and convert to real valued signal */
+ asm volatile( + __asm__ volatile(
+ "xor %%"REG_D", %%"REG_D" \n\t" // 0 + "xor %%"REG_D", %%"REG_D" \n\t" // 0
+ "xor %%"REG_S", %%"REG_S" \n\t" // 0 + "xor %%"REG_S", %%"REG_S" \n\t" // 0
+ "movss %3, %%xmm2 \n\t" // bias + "movss %3, %%xmm2 \n\t" // bias
@ -2129,7 +2129,7 @@
+ delay_ptr+=128; + delay_ptr+=128;
+// window_ptr+=128; +// window_ptr+=128;
+ +
+ asm volatile( + __asm__ volatile(
+ "mov $1024, %%"REG_D" \n\t" // 512 + "mov $1024, %%"REG_D" \n\t" // 512
+ "xor %%"REG_S", %%"REG_S" \n\t" // 0 + "xor %%"REG_S", %%"REG_S" \n\t" // 0
+ "movss %3, %%xmm2 \n\t" // bias + "movss %3, %%xmm2 \n\t" // bias
@ -2158,7 +2158,7 @@
+ /* The trailing edge of the window goes into the delay line */ + /* The trailing edge of the window goes into the delay line */
+ delay_ptr = delay; + delay_ptr = delay;
+ +
+ asm volatile( + __asm__ volatile(
+ "xor %%"REG_D", %%"REG_D" \n\t" // 0 + "xor %%"REG_D", %%"REG_D" \n\t" // 0
+ "xor %%"REG_S", %%"REG_S" \n\t" // 0 + "xor %%"REG_S", %%"REG_S" \n\t" // 0
+ ASMALIGN(4) + ASMALIGN(4)
@ -2180,7 +2180,7 @@
+ delay_ptr+=128; + delay_ptr+=128;
+// window_ptr-=128; +// window_ptr-=128;
+ +
+ asm volatile( + __asm__ volatile(
+ "mov $1024, %%"REG_D" \n\t" // 1024 + "mov $1024, %%"REG_D" \n\t" // 1024
+ "xor %%"REG_S", %%"REG_S" \n\t" // 0 + "xor %%"REG_S", %%"REG_S" \n\t" // 0
+ ASMALIGN(4) + ASMALIGN(4)
@ -2205,7 +2205,7 @@
void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias) void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
{ {
int i, k; int i, k;
@@ -364,7 +1145,7 @@ @@ -368,7 +1145,7 @@
void a52_imdct_init (uint32_t mm_accel) void a52_imdct_init (uint32_t mm_accel)
{ {
@ -2214,7 +2214,7 @@
double sum; double sum;
/* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
@@ -416,6 +1197,99 @@ @@ -420,6 +1197,99 @@
post2[i].real = cos ((M_PI / 128) * (i + 0.5)); post2[i].real = cos ((M_PI / 128) * (i + 0.5));
post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
} }
@ -2314,7 +2314,7 @@
#ifdef LIBA52_DJBFFT #ifdef LIBA52_DJBFFT
if (mm_accel & MM_ACCEL_DJBFFT) { if (mm_accel & MM_ACCEL_DJBFFT) {
@@ -426,7 +1300,5 @@ @@ -430,7 +1300,5 @@
#endif #endif
{ {
fprintf (stderr, "No accelerated IMDCT transform found\n"); fprintf (stderr, "No accelerated IMDCT transform found\n");
@ -2324,7 +2324,7 @@
} }
--- include/mm_accel.h 2006-06-12 15:05:00.000000000 +0200 --- include/mm_accel.h 2006-06-12 15:05:00.000000000 +0200
+++ liba52/mm_accel.h 2006-06-05 02:23:04.000000000 +0200 +++ liba52/mm_accel.h 2006-06-05 02:23:04.000000000 +0200
@@ -30,7 +34,12 @@ @@ -34,7 +34,12 @@
/* x86 accelerations */ /* x86 accelerations */
#define MM_ACCEL_X86_MMX 0x80000000 #define MM_ACCEL_X86_MMX 0x80000000
#define MM_ACCEL_X86_3DNOW 0x40000000 #define MM_ACCEL_X86_3DNOW 0x40000000

View File

@ -38,7 +38,7 @@ static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF000
static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
asm volatile( __asm__ volatile(
"mov $-512, %%"REG_S" \n\t" "mov $-512, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
"movq "MANGLE(wm1100)", %%mm3 \n\t" "movq "MANGLE(wm1100)", %%mm3 \n\t"
@ -77,7 +77,7 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
/* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
#ifdef HAVE_SSE #ifdef HAVE_SSE
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"1: \n\t" "1: \n\t"
"cvtps2pi (%1, %%"REG_S"), %%mm0\n\t" "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t"
@ -93,7 +93,7 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
:: "r" (s16+512), "r" (f+256) :: "r" (s16+512), "r" (f+256)
:"%"REG_S, "memory" :"%"REG_S, "memory"
);*/ );*/
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
"1: \n\t" "1: \n\t"
@ -123,7 +123,7 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t" "pxor %%mm6, %%mm6 \n\t"
@ -177,7 +177,7 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
"1: \n\t" "1: \n\t"
@ -228,7 +228,7 @@ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
"1: \n\t" "1: \n\t"
@ -287,7 +287,7 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t" "pxor %%mm6, %%mm6 \n\t"
@ -327,7 +327,7 @@ static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t" "pxor %%mm6, %%mm6 \n\t"
@ -365,7 +365,7 @@ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t" "pxor %%mm6, %%mm6 \n\t"
@ -405,7 +405,7 @@ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
// "pxor %%mm6, %%mm6 \n\t" // "pxor %%mm6, %%mm6 \n\t"
@ -451,7 +451,7 @@ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f; int32_t * f = (int32_t *) _f;
asm volatile( __asm__ volatile(
"mov $-1024, %%"REG_S" \n\t" "mov $-1024, %%"REG_S" \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t" "movq "MANGLE(magicF2W)", %%mm7 \n\t"
// "pxor %%mm6, %%mm6 \n\t" // "pxor %%mm6, %%mm6 \n\t"

View File

@ -46,7 +46,7 @@ void mp_msg( int x, const char *format, ... ) // stub for cpudetect.c
static inline long long rdtsc() static inline long long rdtsc()
{ {
long long l; long long l;
asm volatile( "rdtsc\n\t" __asm__ volatile("rdtsc\n\t"
: "=A" (l) : "=A" (l)
); );
// printf("%d\n", int(l/1000)); // printf("%d\n", int(l/1000));

View File

@ -374,10 +374,10 @@ void *decode_video(sh_video_t *sh_video, unsigned char *start, int in_size,
// some codecs are broken, and doesn't restore MMX state :( // some codecs are broken, and doesn't restore MMX state :(
// it happens usually with broken/damaged files. // it happens usually with broken/damaged files.
if (gCpuCaps.has3DNow) { if (gCpuCaps.has3DNow) {
__asm __volatile ("femms\n\t":::"memory"); __asm__ __volatile ("femms\n\t":::"memory");
} }
else if (gCpuCaps.hasMMX) { else if (gCpuCaps.hasMMX) {
__asm __volatile ("emms\n\t":::"memory"); __asm__ __volatile ("emms\n\t":::"memory");
} }
#endif #endif

View File

@ -14,7 +14,7 @@
static int diff_y_mmx(unsigned char *a, unsigned char *b, int s) static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
{ {
int ret; int ret;
asm volatile ( __asm__ volatile (
"movl $4, %%ecx \n\t" "movl $4, %%ecx \n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
@ -61,7 +61,7 @@ static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s) static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
{ {
int ret; int ret;
asm volatile ( __asm__ volatile (
"movl $4, %%ecx \n\t" "movl $4, %%ecx \n\t"
"pxor %%mm6, %%mm6 \n\t" "pxor %%mm6, %%mm6 \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
@ -150,7 +150,7 @@ static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
static int var_y_mmx(unsigned char *a, unsigned char *b, int s) static int var_y_mmx(unsigned char *a, unsigned char *b, int s)
{ {
int ret; int ret;
asm volatile ( __asm__ volatile (
"movl $3, %%ecx \n\t" "movl $3, %%ecx \n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"

View File

@ -23,7 +23,7 @@ struct vf_priv_s {
static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns) static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
{ {
volatile short out[4]; volatile short out[4];
asm ( __asm__ (
"movl $8, %%ecx \n\t" "movl $8, %%ecx \n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"

View File

@ -37,7 +37,7 @@ struct vf_priv_s
static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns) static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
{ {
volatile short out[4]; volatile short out[4];
asm ( __asm__ (
"movl $8, %%ecx \n\t" "movl $8, %%ecx \n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"

View File

@ -44,7 +44,7 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in
contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast; contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast;
while (h--) { while (h--) {
asm volatile ( __asm__ volatile (
"movq (%5), %%mm3 \n\t" "movq (%5), %%mm3 \n\t"
"movq (%6), %%mm4 \n\t" "movq (%6), %%mm4 \n\t"
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
@ -82,7 +82,7 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in
src += sstep; src += sstep;
dest += dstep; dest += dstep;
} }
asm volatile ( "emms \n\t" ::: "memory" ); __asm__ volatile ( "emms \n\t" ::: "memory" );
} }
#endif #endif

View File

@ -130,7 +130,7 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
dstep = dstride - w; dstep = dstride - w;
while (h-- > 0) { while (h-- > 0) {
asm volatile ( __asm__ volatile (
"movq (%5), %%mm3 \n\t" "movq (%5), %%mm3 \n\t"
"movq (%6), %%mm4 \n\t" "movq (%6), %%mm4 \n\t"
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
@ -170,7 +170,7 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
dst += dstep; dst += dstep;
} }
asm volatile ( "emms \n\t" ::: "memory" ); __asm__ volatile ( "emms \n\t" ::: "memory" );
} }
#endif #endif

View File

@ -368,13 +368,13 @@ block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
#define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise) #define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise)
#define BLOCK_METRICS_TEMPLATE() \ #define BLOCK_METRICS_TEMPLATE() \
asm volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \ __asm__ volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \
"pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \ "pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \
); \ ); \
a -= as; \ a -= as; \
b -= bs; \ b -= bs; \
do { \ do { \
asm volatile( \ __asm__ volatile( \
"movq (%0,%2), %%mm0\n\t" \ "movq (%0,%2), %%mm0\n\t" \
"movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \ "movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \
PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \ PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \
@ -439,7 +439,7 @@ block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
static const unsigned long long ones = 0x0101010101010101ull; static const unsigned long long ones = 0x0101010101010101ull;
BLOCK_METRICS_TEMPLATE(); BLOCK_METRICS_TEMPLATE();
asm volatile("movq %%mm7, %0\n\temms" : "=m" (tm)); __asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
get_block_stats(&tm, p, s); get_block_stats(&tm, p, s);
#endif #endif
return tm; return tm;
@ -471,7 +471,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
#ifdef DEBUG #ifdef DEBUG
struct frame_stats ts = *s; struct frame_stats ts = *s;
#endif #endif
asm volatile("prefetcht0 (%0,%2)\n\t" __asm__ volatile("prefetcht0 (%0,%2)\n\t"
"prefetcht0 (%1,%3)\n\t" : "prefetcht0 (%1,%3)\n\t" :
: "r" (a), "r" (b), : "r" (a), "r" (b),
"r" (prefetch_line * as), "r" (prefetch_line * bs)); "r" (prefetch_line * as), "r" (prefetch_line * bs));
@ -479,7 +479,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
BLOCK_METRICS_TEMPLATE(); BLOCK_METRICS_TEMPLATE();
s->num_blocks++; s->num_blocks++;
asm volatile( __asm__ volatile(
"movq %3, %%mm0\n\t" "movq %3, %%mm0\n\t"
"movq %%mm7, %%mm1\n\t" "movq %%mm7, %%mm1\n\t"
"psubusw %%mm0, %%mm1\n\t" "psubusw %%mm0, %%mm1\n\t"
@ -525,7 +525,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
s->interlaced_high += interlaced >> 16; s->interlaced_high += interlaced >> 16;
s->interlaced_low += interlaced; s->interlaced_low += interlaced;
} else { } else {
asm volatile( __asm__ volatile(
"pcmpeqw %%mm0, %%mm0\n\t" /* -1 */ "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */
"psubw %%mm0, %%mm4\n\t" "psubw %%mm0, %%mm4\n\t"
"psubw %%mm0, %%mm5\n\t" "psubw %%mm0, %%mm5\n\t"
@ -539,7 +539,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
: "=m" (s->tiny), "=m" (s->low), "=m" (s->high) : "=m" (s->tiny), "=m" (s->low), "=m" (s->high)
); );
asm volatile( __asm__ volatile(
"pshufw $0, %2, %%mm0\n\t" "pshufw $0, %2, %%mm0\n\t"
"psubusw %%mm7, %%mm0\n\t" "psubusw %%mm7, %%mm0\n\t"
"pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */ "pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */
@ -556,7 +556,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
); );
} }
asm volatile( __asm__ volatile(
"movq %%mm7, (%1)\n\t" "movq %%mm7, (%1)\n\t"
PMAXUW((%0), %%mm7) PMAXUW((%0), %%mm7)
"movq %%mm7, (%0)\n\t" "movq %%mm7, (%0)\n\t"
@ -597,7 +597,7 @@ dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
#else #else
unsigned long len = (w+7) >> 3; unsigned long len = (w+7) >> 3;
int ret; int ret;
asm volatile ( __asm__ volatile (
"pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */ "pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */
"movd %0, %%mm7 \n\t" "movd %0, %%mm7 \n\t"
"punpcklbw %%mm7, %%mm7 \n\t" "punpcklbw %%mm7, %%mm7 \n\t"
@ -607,7 +607,7 @@ dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
: "rm" (t) : "rm" (t)
); );
do { do {
asm volatile ( __asm__ volatile (
"movq (%0), %%mm0\n\t" "movq (%0), %%mm0\n\t"
"movq (%0,%3,2), %%mm1\n\t" "movq (%0,%3,2), %%mm1\n\t"
"movq %%mm0, (%2)\n\t" "movq %%mm0, (%2)\n\t"
@ -639,7 +639,7 @@ dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
dst += 8; dst += 8;
} while (--len); } while (--len);
asm volatile ("pxor %%mm7, %%mm7 \n\t" __asm__ volatile ("pxor %%mm7, %%mm7 \n\t"
"psadbw %%mm6, %%mm7 \n\t" "psadbw %%mm6, %%mm7 \n\t"
"movd %%mm7, %0 \n\t" "movd %%mm7, %0 \n\t"
"emms \n\t" "emms \n\t"

View File

@ -187,7 +187,7 @@ static void store_slice_mmx(uint8_t *dst, int16_t *src, long dst_stride, long sr
width = (width+7)&~7; width = (width+7)&~7;
dst_stride-=width; dst_stride-=width;
//src_stride=(src_stride-width)*2; //src_stride=(src_stride-width)*2;
asm volatile( __asm__ volatile(
"mov %5, %%"REG_d" \n\t" "mov %5, %%"REG_d" \n\t"
"mov %6, %%"REG_S" \n\t" "mov %6, %%"REG_S" \n\t"
"mov %7, %%"REG_D" \n\t" "mov %7, %%"REG_D" \n\t"
@ -255,7 +255,7 @@ static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long s
width = (width+7)&~7; width = (width+7)&~7;
dst_stride-=width; dst_stride-=width;
//src_stride=(src_stride-width)*2; //src_stride=(src_stride-width)*2;
asm volatile( __asm__ volatile(
"mov %5, %%"REG_d" \n\t" "mov %5, %%"REG_d" \n\t"
"mov %6, %%"REG_S" \n\t" "mov %6, %%"REG_S" \n\t"
"mov %7, %%"REG_D" \n\t" "mov %7, %%"REG_D" \n\t"
@ -318,7 +318,7 @@ static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long s
static void mul_thrmat_mmx(struct vf_priv_s *p, int q) static void mul_thrmat_mmx(struct vf_priv_s *p, int q)
{ {
uint64_t *adr=&p->threshold_mtx_noq[0]; uint64_t *adr=&p->threshold_mtx_noq[0];
asm volatile( __asm__ volatile(
"movd %0, %%mm7 \n\t" "movd %0, %%mm7 \n\t"
"add $8*8*2, %%"REG_D" \n\t" "add $8*8*2, %%"REG_D" \n\t"
"movq 0*8(%%"REG_S"), %%mm0 \n\t" "movq 0*8(%%"REG_S"), %%mm0 \n\t"
@ -558,10 +558,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts)
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t"); if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
#endif #endif
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t"); if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
#endif #endif
return vf_next_put_image(vf,dmpi, pts); return vf_next_put_image(vf,dmpi, pts);
} }
@ -868,7 +868,7 @@ static void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int
static void column_fidct_mmx(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt) static void column_fidct_mmx(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt)
{ {
uint64_t __attribute__((aligned(8))) temps[4]; uint64_t __attribute__((aligned(8))) temps[4];
asm volatile( __asm__ volatile(
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t" "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t"
@ -1669,7 +1669,7 @@ static void row_idct_mmx (DCTELEM* workspace,
int16_t* output_adr, int output_stride, int cnt) int16_t* output_adr, int output_stride, int cnt)
{ {
uint64_t __attribute__((aligned(8))) temps[4]; uint64_t __attribute__((aligned(8))) temps[4];
asm volatile( __asm__ volatile(
"lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t" "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
"1: \n\t" "1: \n\t"
"movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm0 \n\t" "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm0 \n\t"
@ -1935,7 +1935,7 @@ static void row_fdct_c(DCTELEM *data, const uint8_t *pixels, int line_size, int
static void row_fdct_mmx(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt) static void row_fdct_mmx(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt)
{ {
uint64_t __attribute__((aligned(8))) temps[4]; uint64_t __attribute__((aligned(8))) temps[4];
asm volatile( __asm__ volatile(
"lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t" "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
"6: \n\t" "6: \n\t"
"movd (%%"REG_S"), %%mm0 \n\t" "movd (%%"REG_S"), %%mm0 \n\t"

View File

@ -37,7 +37,7 @@ static void halfpack_MMX(unsigned char *dst, unsigned char *src[3],
vinc = srcstride[2] - w/2; vinc = srcstride[2] - w/2;
for (h/=2; h; h--) { for (h/=2; h; h--) {
asm ( __asm__ (
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
@ -99,7 +99,7 @@ static void halfpack_MMX(unsigned char *dst, unsigned char *src[3],
v += vinc; v += vinc;
dst += dstinc; dst += dstinc;
} }
asm volatile ( "emms \n\t" ::: "memory" ); __asm__ volatile ( "emms \n\t" ::: "memory" );
} }
#endif #endif

View File

@ -61,7 +61,7 @@ static void pack_li_1_C(unsigned char *dst, unsigned char *y,
static void pack_nn_MMX(unsigned char *dst, unsigned char *y, static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
unsigned char *u, unsigned char *v, int w) unsigned char *u, unsigned char *v, int w)
{ {
asm volatile ("" __asm__ volatile (""
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
"movq (%0), %%mm1 \n\t" "movq (%0), %%mm1 \n\t"
@ -91,7 +91,7 @@ static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
unsigned char *u, unsigned char *v, int w, int us, int vs) unsigned char *u, unsigned char *v, int w, int us, int vs)
{ {
asm volatile ("" __asm__ volatile (""
"push %%"REG_BP" \n\t" "push %%"REG_BP" \n\t"
#ifdef ARCH_X86_64 #ifdef ARCH_X86_64
"mov %6, %%"REG_BP" \n\t" "mov %6, %%"REG_BP" \n\t"
@ -199,7 +199,7 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
unsigned char *u, unsigned char *v, int w, int us, int vs) unsigned char *u, unsigned char *v, int w, int us, int vs)
{ {
asm volatile ("" __asm__ volatile (""
"push %%"REG_BP" \n\t" "push %%"REG_BP" \n\t"
#ifdef ARCH_X86_64 #ifdef ARCH_X86_64
"mov %6, %%"REG_BP" \n\t" "mov %6, %%"REG_BP" \n\t"

View File

@ -46,7 +46,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
int i; int i;
short out[24]; // output buffer for the partial metrics from the mmx code short out[24]; // output buffer for the partial metrics from the mmx code
asm ( __asm__ (
"movl $4, %%ecx \n\t" "movl $4, %%ecx \n\t"
"pxor %%mm4, %%mm4 \n\t" // 4 even difference sums "pxor %%mm4, %%mm4 \n\t" // 4 even difference sums
"pxor %%mm5, %%mm5 \n\t" // 4 odd difference sums "pxor %%mm5, %%mm5 \n\t" // 4 odd difference sums
@ -105,7 +105,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
m->o = out[4]+out[5]+out[6]+out[7]; m->o = out[4]+out[5]+out[6]+out[7];
m->d = m->e + m->o; m->d = m->e + m->o;
asm ( __asm__ (
// First loop to measure first four columns // First loop to measure first four columns
"movl $4, %%ecx \n\t" "movl $4, %%ecx \n\t"
"pxor %%mm4, %%mm4 \n\t" // Past spacial noise "pxor %%mm4, %%mm4 \n\t" // Past spacial noise

View File

@ -150,7 +150,7 @@ static inline void lineNoise_MMX(uint8_t *dst, uint8_t *src, int8_t *noise, int
long mmx_len= len&(~7); long mmx_len= len&(~7);
noise+=shift; noise+=shift;
asm volatile( __asm__ volatile(
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
"pcmpeqb %%mm7, %%mm7 \n\t" "pcmpeqb %%mm7, %%mm7 \n\t"
"psllw $15, %%mm7 \n\t" "psllw $15, %%mm7 \n\t"
@ -179,7 +179,7 @@ static inline void lineNoise_MMX2(uint8_t *dst, uint8_t *src, int8_t *noise, int
long mmx_len= len&(~7); long mmx_len= len&(~7);
noise+=shift; noise+=shift;
asm volatile( __asm__ volatile(
"mov %3, %%"REG_a" \n\t" "mov %3, %%"REG_a" \n\t"
"pcmpeqb %%mm7, %%mm7 \n\t" "pcmpeqb %%mm7, %%mm7 \n\t"
"psllw $15, %%mm7 \n\t" "psllw $15, %%mm7 \n\t"
@ -220,7 +220,7 @@ static inline void lineNoise_C(uint8_t *dst, uint8_t *src, int8_t *noise, int le
static inline void lineNoiseAvg_MMX(uint8_t *dst, uint8_t *src, int len, int8_t **shift){ static inline void lineNoiseAvg_MMX(uint8_t *dst, uint8_t *src, int len, int8_t **shift){
long mmx_len= len&(~7); long mmx_len= len&(~7);
asm volatile( __asm__ volatile(
"mov %5, %%"REG_a" \n\t" "mov %5, %%"REG_a" \n\t"
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
@ -357,10 +357,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){
vf_clone_mpi_attributes(dmpi, mpi); vf_clone_mpi_attributes(dmpi, mpi);
#ifdef HAVE_MMX #ifdef HAVE_MMX
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t"); if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
#endif #endif
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t"); if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
#endif #endif
return vf_next_put_image(vf,dmpi, pts); return vf_next_put_image(vf,dmpi, pts);

View File

@ -157,7 +157,7 @@ static void dctB_c(DCTELEM *dst, DCTELEM *src){
#ifdef HAVE_MMX #ifdef HAVE_MMX
static void dctB_mmx(DCTELEM *dst, DCTELEM *src){ static void dctB_mmx(DCTELEM *dst, DCTELEM *src){
asm volatile ( __asm__ volatile (
"movq (%0), %%mm0 \n\t" "movq (%0), %%mm0 \n\t"
"movq 1*4*2(%0), %%mm1 \n\t" "movq 1*4*2(%0), %%mm1 \n\t"
"paddw 6*4*2(%0), %%mm0 \n\t" "paddw 6*4*2(%0), %%mm0 \n\t"
@ -398,10 +398,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t"); if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
#endif #endif
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t"); if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
#endif #endif
return vf_next_put_image(vf,dmpi, pts); return vf_next_put_image(vf,dmpi, pts);

View File

@ -140,7 +140,7 @@ static int config(struct vf_instance_s* vf,
unsigned int flags, unsigned int outfmt){ unsigned int flags, unsigned int outfmt){
int sw, sh; int sw, sh;
//asm volatile("emms\n\t"); //__asm__ volatile("emms\n\t");
allocStuff(&vf->priv->luma, width, height); allocStuff(&vf->priv->luma, width, height);
getSubSampleFactors(&sw, &sh, outfmt); getSubSampleFactors(&sw, &sh, outfmt);

View File

@ -558,7 +558,7 @@ void sws_getFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, Sw
#ifdef ARCH_X86 #ifdef ARCH_X86
if(gCpuCaps.hasMMX) if(gCpuCaps.hasMMX)
asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
#endif #endif
if(firstTime) if(firstTime)
{ {

View File

@ -153,7 +153,7 @@ static void hardthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *pe
threshold1= qp*((1<<4) - bias) - 1; threshold1= qp*((1<<4) - bias) - 1;
asm volatile( __asm__ volatile(
#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
"movq " #src0 ", %%mm0 \n\t"\ "movq " #src0 ", %%mm0 \n\t"\
"movq " #src1 ", %%mm1 \n\t"\ "movq " #src1 ", %%mm1 \n\t"\
@ -221,7 +221,7 @@ static void softthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *pe
threshold1= qp*((1<<4) - bias) - 1; threshold1= qp*((1<<4) - bias) - 1;
asm volatile( __asm__ volatile(
#undef REQUANT_CORE #undef REQUANT_CORE
#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
"movq " #src0 ", %%mm0 \n\t"\ "movq " #src0 ", %%mm0 \n\t"\
@ -334,7 +334,7 @@ static void store_slice_mmx(uint8_t *dst, int16_t *src, int dst_stride, int src_
for(y=0; y<height; y++){ for(y=0; y<height; y++){
uint8_t *dst1= dst; uint8_t *dst1= dst;
int16_t *src1= src; int16_t *src1= src;
asm volatile( __asm__ volatile(
"movq (%3), %%mm3 \n\t" "movq (%3), %%mm3 \n\t"
"movq (%3), %%mm4 \n\t" "movq (%3), %%mm4 \n\t"
"movd %4, %%mm2 \n\t" "movd %4, %%mm2 \n\t"
@ -500,10 +500,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t"); if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
#endif #endif
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t"); if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
#endif #endif
return vf_next_put_image(vf,dmpi, pts); return vf_next_put_image(vf,dmpi, pts);

View File

@ -58,7 +58,7 @@ static void qpel_li_3DNOW(unsigned char *d, unsigned char *s, int w, int h, int
s += ss; s += ss;
} }
for (i=h-1; i; i--) { for (i=h-1; i; i--) {
asm volatile( __asm__ volatile(
"1: \n\t" "1: \n\t"
"movq (%%"REG_S"), %%mm0 \n\t" "movq (%%"REG_S"), %%mm0 \n\t"
"movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t" "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
@ -78,7 +78,7 @@ static void qpel_li_3DNOW(unsigned char *d, unsigned char *s, int w, int h, int
s += ss; s += ss;
} }
if (!up) fast_memcpy(d, s, w); if (!up) fast_memcpy(d, s, w);
asm volatile("emms \n\t" : : : "memory"); __asm__ volatile("emms \n\t" : : : "memory");
} }
#endif #endif
@ -94,7 +94,7 @@ static void qpel_li_MMX2(unsigned char *d, unsigned char *s, int w, int h, int d
s += ss; s += ss;
} }
for (i=h-1; i; i--) { for (i=h-1; i; i--) {
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"2: \n\t" "2: \n\t"
"movq (%%"REG_S"), %%mm0 \n\t" "movq (%%"REG_S"), %%mm0 \n\t"
@ -115,7 +115,7 @@ static void qpel_li_MMX2(unsigned char *d, unsigned char *s, int w, int h, int d
s += ss; s += ss;
} }
if (!up) fast_memcpy(d, s, w); if (!up) fast_memcpy(d, s, w);
asm volatile("emms \n\t" : : : "memory"); __asm__ volatile("emms \n\t" : : : "memory");
} }
#endif #endif
@ -131,7 +131,7 @@ static void qpel_li_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds
s += ss; s += ss;
} }
for (i=h-1; i; i--) { for (i=h-1; i; i--) {
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"3: \n\t" "3: \n\t"
"movq (%%"REG_S"), %%mm0 \n\t" "movq (%%"REG_S"), %%mm0 \n\t"
@ -165,7 +165,7 @@ static void qpel_li_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds
s += ss; s += ss;
} }
if (!up) fast_memcpy(d, s, w); if (!up) fast_memcpy(d, s, w);
asm volatile("emms \n\t" : : : "memory"); __asm__ volatile("emms \n\t" : : : "memory");
} }
static void qpel_4tap_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up) static void qpel_4tap_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
@ -185,7 +185,7 @@ static void qpel_4tap_MMX(unsigned char *d, unsigned char *s, int w, int h, int
d[j] = (s[j+ssd] + 3*s[j])>>2; d[j] = (s[j+ssd] + 3*s[j])>>2;
d += ds; s += ss; d += ds; s += ss;
for (i=h-3; i; i--) { for (i=h-3; i; i--) {
asm volatile( __asm__ volatile(
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
"movq (%%"REG_d"), %%mm4 \n\t" "movq (%%"REG_d"), %%mm4 \n\t"
"movq 8(%%"REG_d"), %%mm5 \n\t" "movq 8(%%"REG_d"), %%mm5 \n\t"
@ -245,7 +245,7 @@ static void qpel_4tap_MMX(unsigned char *d, unsigned char *s, int w, int h, int
d[j] = (s[j+ssd] + 3*s[j])>>2; d[j] = (s[j+ssd] + 3*s[j])>>2;
d += ds; s += ss; d += ds; s += ss;
if (!up) fast_memcpy(d, s, w); if (!up) fast_memcpy(d, s, w);
asm volatile("emms \n\t" : : : "memory"); __asm__ volatile("emms \n\t" : : : "memory");
} }
#endif #endif

View File

@ -194,11 +194,11 @@ static int put_image( struct vf_instance_s* vf, mp_image_t *mpi, double pts) {
#ifdef HAVE_MMX #ifdef HAVE_MMX
if(gCpuCaps.hasMMX) if(gCpuCaps.hasMMX)
asm volatile ("emms\n\t"); __asm__ volatile ("emms\n\t");
#endif #endif
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
if(gCpuCaps.hasMMX2) if(gCpuCaps.hasMMX2)
asm volatile ("sfence\n\t"); __asm__ volatile ("sfence\n\t");
#endif #endif
return vf_next_put_image( vf, dmpi, pts); return vf_next_put_image( vf, dmpi, pts);

View File

@ -292,10 +292,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t"); if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
#endif #endif
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t"); if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
#endif #endif
return vf_next_put_image(vf,dmpi, pts); return vf_next_put_image(vf,dmpi, pts);

View File

@ -142,7 +142,7 @@ static void filter_line_mmx2(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, u
#define FILTER\ #define FILTER\
for(x=0; x<w; x+=4){\ for(x=0; x<w; x+=4){\
asm volatile(\ __asm__ volatile(\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
LOAD4("(%[cur],%[mrefs])", %%mm0) /* c = cur[x-refs] */\ LOAD4("(%[cur],%[mrefs])", %%mm0) /* c = cur[x-refs] */\
LOAD4("(%[cur],%[prefs])", %%mm1) /* e = cur[x+refs] */\ LOAD4("(%[cur],%[prefs])", %%mm1) /* e = cur[x+refs] */\
@ -259,7 +259,7 @@ static void filter_line_mmx2(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, u
[pb1] "m"(pb_1),\ [pb1] "m"(pb_1),\
[mode] "g"(mode)\ [mode] "g"(mode)\
);\ );\
asm volatile("movd %%mm1, %0" :"=m"(*dst));\ __asm__ volatile("movd %%mm1, %0" :"=m"(*dst));\
dst += 4;\ dst += 4;\
prev+= 4;\ prev+= 4;\
cur += 4;\ cur += 4;\
@ -370,7 +370,7 @@ static void filter(struct vf_priv_s *p, uint8_t *dst[3], int dst_stride[3], int
} }
} }
#if defined(HAVE_MMX) && defined(NAMED_ASM_ARGS) #if defined(HAVE_MMX) && defined(NAMED_ASM_ARGS)
if(gCpuCaps.hasMMX2) asm volatile("emms \n\t" : : : "memory"); if(gCpuCaps.hasMMX2) __asm__ volatile("emms \n\t" : : : "memory");
#endif #endif
} }

View File

@ -256,7 +256,7 @@ static void * RENAME(fast_memcpy)(void * to, const void * from, size_t len)
// printf(" %d %d\n", (int)from&1023, (int)to&1023); // printf(" %d %d\n", (int)from&1023, (int)to&1023);
// Pure Assembly cuz gcc is a bit unpredictable ;) // Pure Assembly cuz gcc is a bit unpredictable ;)
if(i>=BLOCK_SIZE/64) if(i>=BLOCK_SIZE/64)
asm volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"

View File

@ -33,7 +33,7 @@ static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, u
w=w>>1; w=w>>1;
#endif #endif
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( __asm__ volatile(
"pcmpeqb %%mm5, %%mm5\n\t" // F..F "pcmpeqb %%mm5, %%mm5\n\t" // F..F
"movq %%mm5, %%mm4\n\t" "movq %%mm5, %%mm4\n\t"
"movq %%mm5, %%mm7\n\t" "movq %%mm5, %%mm7\n\t"
@ -44,13 +44,13 @@ static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, u
for(y=0;y<h;y++){ for(y=0;y<h;y++){
register int x; register int x;
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( __asm__ volatile(
PREFETCHW" %0\n\t" PREFETCHW" %0\n\t"
PREFETCH" %1\n\t" PREFETCH" %1\n\t"
PREFETCH" %2\n\t" PREFETCH" %2\n\t"
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
for(x=0;x<w;x+=8){ for(x=0;x<w;x+=8){
asm volatile( __asm__ volatile(
"movl %1, %%eax\n\t" "movl %1, %%eax\n\t"
"orl 4%1, %%eax\n\t" "orl 4%1, %%eax\n\t"
" jz 1f\n\t" " jz 1f\n\t"
@ -92,7 +92,7 @@ static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, u
dstbase+=dststride; dstbase+=dststride;
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
return; return;
} }
@ -103,7 +103,7 @@ static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, u
w=w>>1; w=w>>1;
#endif #endif
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7\n\t" "pxor %%mm7, %%mm7\n\t"
"pcmpeqb %%mm5, %%mm5\n\t" // F..F "pcmpeqb %%mm5, %%mm5\n\t" // F..F
"movq %%mm5, %%mm6\n\t" "movq %%mm5, %%mm6\n\t"
@ -115,13 +115,13 @@ static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, u
for(y=0;y<h;y++){ for(y=0;y<h;y++){
register int x; register int x;
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( __asm__ volatile(
PREFETCHW" %0\n\t" PREFETCHW" %0\n\t"
PREFETCH" %1\n\t" PREFETCH" %1\n\t"
PREFETCH" %2\n\t" PREFETCH" %2\n\t"
::"m"(*dstbase),"m"(*srca),"m"(*src)); ::"m"(*dstbase),"m"(*srca),"m"(*src));
for(x=0;x<w;x+=4){ for(x=0;x<w;x+=4){
asm volatile( __asm__ volatile(
"movl %1, %%eax\n\t" "movl %1, %%eax\n\t"
"orl %%eax, %%eax\n\t" "orl %%eax, %%eax\n\t"
" jz 1f\n\t" " jz 1f\n\t"
@ -164,7 +164,7 @@ static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, u
dstbase+=dststride; dstbase+=dststride;
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
return; return;
} }
@ -196,7 +196,7 @@ static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, u
static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
int y; int y;
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7\n\t" "pxor %%mm7, %%mm7\n\t"
"pcmpeqb %%mm6, %%mm6\n\t" // F..F "pcmpeqb %%mm6, %%mm6\n\t" // F..F
::); ::);
@ -206,14 +206,14 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src,
register int x; register int x;
#if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX)) #if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX))
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( __asm__ volatile(
PREFETCHW" %0\n\t" PREFETCHW" %0\n\t"
PREFETCH" %1\n\t" PREFETCH" %1\n\t"
PREFETCH" %2\n\t" PREFETCH" %2\n\t"
::"m"(*dst),"m"(*srca),"m"(*src):"memory"); ::"m"(*dst),"m"(*srca),"m"(*src):"memory");
for(x=0;x<w;x+=2){ for(x=0;x<w;x+=2){
if(srca[x] || srca[x+1]) if(srca[x] || srca[x+1])
asm volatile( __asm__ volatile(
PREFETCHW" 32%0\n\t" PREFETCHW" 32%0\n\t"
PREFETCH" 32%1\n\t" PREFETCH" 32%1\n\t"
PREFETCH" 32%2\n\t" PREFETCH" 32%2\n\t"
@ -250,7 +250,7 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src,
#else /* HAVE_MMX */ #else /* HAVE_MMX */
for(x=0;x<w;x++){ for(x=0;x<w;x++){
if(srca[x]){ if(srca[x]){
asm volatile( __asm__ volatile(
"movzbl (%0), %%ecx\n\t" "movzbl (%0), %%ecx\n\t"
"movzbl 1(%0), %%eax\n\t" "movzbl 1(%0), %%eax\n\t"
@ -296,7 +296,7 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src,
dstbase+=dststride; dstbase+=dststride;
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
return; return;
} }
@ -308,12 +308,12 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
#endif #endif
#ifdef HAVE_MMX #ifdef HAVE_MMX
#ifdef HAVE_3DNOW #ifdef HAVE_3DNOW
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7\n\t" "pxor %%mm7, %%mm7\n\t"
"pcmpeqb %%mm6, %%mm6\n\t" // F..F "pcmpeqb %%mm6, %%mm6\n\t" // F..F
::); ::);
#else /* HAVE_3DNOW */ #else /* HAVE_3DNOW */
asm volatile( __asm__ volatile(
"pxor %%mm7, %%mm7\n\t" "pxor %%mm7, %%mm7\n\t"
"pcmpeqb %%mm5, %%mm5\n\t" // F..F "pcmpeqb %%mm5, %%mm5\n\t" // F..F
"movq %%mm5, %%mm4\n\t" "movq %%mm5, %%mm4\n\t"
@ -327,14 +327,14 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
#if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX)) #if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX))
#ifdef HAVE_MMX #ifdef HAVE_MMX
#ifdef HAVE_3DNOW #ifdef HAVE_3DNOW
asm volatile( __asm__ volatile(
PREFETCHW" %0\n\t" PREFETCHW" %0\n\t"
PREFETCH" %1\n\t" PREFETCH" %1\n\t"
PREFETCH" %2\n\t" PREFETCH" %2\n\t"
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
for(x=0;x<w;x+=2){ for(x=0;x<w;x+=2){
if(srca[x] || srca[x+1]) if(srca[x] || srca[x+1])
asm volatile( __asm__ volatile(
PREFETCHW" 32%0\n\t" PREFETCHW" 32%0\n\t"
PREFETCH" 32%1\n\t" PREFETCH" 32%1\n\t"
PREFETCH" 32%2\n\t" PREFETCH" 32%2\n\t"
@ -362,13 +362,13 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x])); :: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
} }
#else //this is faster for intels crap #else //this is faster for intels crap
asm volatile( __asm__ volatile(
PREFETCHW" %0\n\t" PREFETCHW" %0\n\t"
PREFETCH" %1\n\t" PREFETCH" %1\n\t"
PREFETCH" %2\n\t" PREFETCH" %2\n\t"
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory"); ::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
for(x=0;x<w;x+=4){ for(x=0;x<w;x+=4){
asm volatile( __asm__ volatile(
"movl %1, %%eax\n\t" "movl %1, %%eax\n\t"
"orl %%eax, %%eax\n\t" "orl %%eax, %%eax\n\t"
" jz 1f\n\t" " jz 1f\n\t"
@ -417,7 +417,7 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
#else /* HAVE_MMX */ #else /* HAVE_MMX */
for(x=0;x<w;x++){ for(x=0;x<w;x++){
if(srca[x]){ if(srca[x]){
asm volatile( __asm__ volatile(
"movzbl (%0), %%ecx\n\t" "movzbl (%0), %%ecx\n\t"
"movzbl 1(%0), %%eax\n\t" "movzbl 1(%0), %%eax\n\t"
"movzbl 2(%0), %%edx\n\t" "movzbl 2(%0), %%edx\n\t"
@ -461,7 +461,7 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
dstbase+=dststride; dstbase+=dststride;
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
return; return;
} }

View File

@ -4569,7 +4569,7 @@ static INT WINAPI expMessageBoxA(HWND hWnd, LPCSTR text, LPCSTR title, UINT type
void exp_EH_prolog(void *dest); void exp_EH_prolog(void *dest);
//! just a dummy function that acts a container for the asm section //! just a dummy function that acts a container for the asm section
void exp_EH_prolog_dummy(void) { void exp_EH_prolog_dummy(void) {
asm volatile ( __asm__ volatile (
// take care, this "function" may not change flags or // take care, this "function" may not change flags or
// registers besides eax (which is also why we can't use // registers besides eax (which is also why we can't use
// exp_EH_prolog_dummy directly) // exp_EH_prolog_dummy directly)

View File

@ -693,7 +693,7 @@ typedef HANDLE *PHANDLE;
#else /* __GNUC__ */ #else /* __GNUC__ */
# define ASM_GLOBAL_FUNC(name,code) \ # define ASM_GLOBAL_FUNC(name,code) \
void asm_dummy_##name(void) { \ void asm_dummy_##name(void) { \
asm( ".align 4\n\t" \ __asm__( ".align 4\n\t" \
".globl " ASM_NAME(#name) "\n\t" \ ".globl " ASM_NAME(#name) "\n\t" \
".type " ASM_NAME(#name) ",@function\n" \ ".type " ASM_NAME(#name) ",@function\n" \
ASM_NAME(#name) ":\n\t" \ ASM_NAME(#name) ":\n\t" \

View File

@ -18,7 +18,7 @@ static float attribute_used plus_1f = 1.0;
void dct64_MMX_3dnow(short *a,short *b,real *c) void dct64_MMX_3dnow(short *a,short *b,real *c)
{ {
char tmp[256]; char tmp[256];
__asm __volatile( __asm__ __volatile(
" movl %2,%%eax\n\t" " movl %2,%%eax\n\t"
" leal 128+%3,%%edx\n\t" " leal 128+%3,%%edx\n\t"

View File

@ -18,7 +18,7 @@ static float attribute_used plus_1f = 1.0;
void dct64_MMX_3dnowex(short *a,short *b,real *c) void dct64_MMX_3dnowex(short *a,short *b,real *c)
{ {
char tmp[256]; char tmp[256];
__asm __volatile( __asm__ __volatile(
" movl %2,%%eax\n\t" " movl %2,%%eax\n\t"
" leal 128+%3,%%edx\n\t" " leal 128+%3,%%edx\n\t"

View File

@ -9,7 +9,7 @@
void dct64_MMX(short *a,short *b,real *c) void dct64_MMX(short *a,short *b,real *c)
{ {
char tmp[256]; char tmp[256];
__asm __volatile( __asm__ __volatile(
" movl %2,%%eax\n\t" " movl %2,%%eax\n\t"
/* Phase 1*/ /* Phase 1*/
" flds (%%eax)\n\t" " flds (%%eax)\n\t"

View File

@ -32,7 +32,7 @@ void dct64_sse(short *out0,short *out1,real *c)
for (i = 0; i < 0x20 / 2; i += 4) for (i = 0; i < 0x20 / 2; i += 4)
{ {
asm( __asm__(
"movaps %2, %%xmm3\n\t" "movaps %2, %%xmm3\n\t"
"shufps $27, %%xmm3, %%xmm3\n\t" "shufps $27, %%xmm3, %%xmm3\n\t"
"movaps %3, %%xmm1\n\t" "movaps %3, %%xmm1\n\t"
@ -57,7 +57,7 @@ void dct64_sse(short *out0,short *out1,real *c)
for (i = 0; i < 0x20; i += 0x10) for (i = 0; i < 0x20; i += 0x10)
{ {
asm( __asm__(
"movaps %4, %%xmm1\n\t" "movaps %4, %%xmm1\n\t"
"movaps %5, %%xmm3\n\t" "movaps %5, %%xmm3\n\t"
"movaps %6, %%xmm4\n\t" "movaps %6, %%xmm4\n\t"
@ -86,7 +86,7 @@ void dct64_sse(short *out0,short *out1,real *c)
{ {
real *costab = costab_mmx + 16; real *costab = costab_mmx + 16;
asm( __asm__(
"movaps %4, %%xmm0\n\t" "movaps %4, %%xmm0\n\t"
"movaps %5, %%xmm1\n\t" "movaps %5, %%xmm1\n\t"
"movaps %8, %%xmm4\n\t" "movaps %8, %%xmm4\n\t"
@ -116,7 +116,7 @@ void dct64_sse(short *out0,short *out1,real *c)
real *costab = costab_mmx + 24; real *costab = costab_mmx + 24;
int i; int i;
asm( __asm__(
"movaps %0, %%xmm0\n\t" "movaps %0, %%xmm0\n\t"
"shufps $27, %%xmm0, %%xmm0\n\t" "shufps $27, %%xmm0, %%xmm0\n\t"
"movaps %1, %%xmm5\n\t" "movaps %1, %%xmm5\n\t"
@ -127,7 +127,7 @@ void dct64_sse(short *out0,short *out1,real *c)
for (i = 0; i < 0x20; i += 8) for (i = 0; i < 0x20; i += 8)
{ {
asm( __asm__(
"movaps %2, %%xmm2\n\t" "movaps %2, %%xmm2\n\t"
"movaps %3, %%xmm3\n\t" "movaps %3, %%xmm3\n\t"
"movaps %%xmm2, %%xmm4\n\t" "movaps %%xmm2, %%xmm4\n\t"
@ -150,7 +150,7 @@ void dct64_sse(short *out0,short *out1,real *c)
{ {
int i; int i;
asm( __asm__(
"movss %0, %%xmm1\n\t" "movss %0, %%xmm1\n\t"
"movss %1, %%xmm0\n\t" "movss %1, %%xmm0\n\t"
"movaps %%xmm1, %%xmm3\n\t" "movaps %%xmm1, %%xmm3\n\t"
@ -166,7 +166,7 @@ void dct64_sse(short *out0,short *out1,real *c)
for (i = 0; i < 0x20; i += 8) for (i = 0; i < 0x20; i += 8)
{ {
asm( __asm__(
"movaps %2, %%xmm3\n\t" "movaps %2, %%xmm3\n\t"
"movaps %%xmm3, %%xmm4\n\t" "movaps %%xmm3, %%xmm4\n\t"
"shufps $20, %%xmm4, %%xmm4\n\t" "shufps $20, %%xmm4, %%xmm4\n\t"
@ -190,7 +190,7 @@ void dct64_sse(short *out0,short *out1,real *c)
{ {
int i; int i;
asm( __asm__(
"movss %0, %%xmm0\n\t" "movss %0, %%xmm0\n\t"
"movaps %%xmm1, %%xmm2\n\t" "movaps %%xmm1, %%xmm2\n\t"
"movaps %%xmm0, %%xmm7\n\t" "movaps %%xmm0, %%xmm7\n\t"
@ -204,7 +204,7 @@ void dct64_sse(short *out0,short *out1,real *c)
for (i = 0x8; i < 0x20; i += 8) for (i = 0x8; i < 0x20; i += 8)
{ {
asm volatile ( __asm__ volatile (
"movaps %2, %%xmm1\n\t" "movaps %2, %%xmm1\n\t"
"movaps %%xmm1, %%xmm3\n\t" "movaps %%xmm1, %%xmm3\n\t"
"shufps $224, %%xmm3, %%xmm3\n\t" "shufps $224, %%xmm3, %%xmm3\n\t"
@ -285,7 +285,7 @@ void dct64_sse(short *out0,short *out1,real *c)
To do saturation efficiently in x86 we can use fist(t)(p), To do saturation efficiently in x86 we can use fist(t)(p),
pf2iw, or packssdw. We use fist(p) here. pf2iw, or packssdw. We use fist(p) here.
*/ */
asm( __asm__(
"flds %0\n\t" "flds %0\n\t"
"flds (%2)\n\t" "flds (%2)\n\t"
"fadds 4(%2)\n\t" "fadds 4(%2)\n\t"

View File

@ -41,7 +41,7 @@ int synth_1to1_pent(real *bandPtr, int channel, short *samples)
{ {
real tmp[3]; real tmp[3];
register int retval; register int retval;
__asm __volatile( __asm__ __volatile(
" movl %%ebp,"MANGLE(saved_ebp)"\n\t" " movl %%ebp,"MANGLE(saved_ebp)"\n\t"
" movl %1,%%eax\n\t"/*bandPtr*/ " movl %1,%%eax\n\t"/*bandPtr*/
" movl %3,%%esi\n\t" " movl %3,%%esi\n\t"

View File

@ -217,7 +217,7 @@ int synth_1to1_MMX(real *bandPtr, int channel, short *samples)
dct64_MMX_func(a, b, bandPtr); dct64_MMX_func(a, b, bandPtr);
window = mp3lib_decwins + 16 - bo1; window = mp3lib_decwins + 16 - bo1;
//printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1); //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1);
__asm __volatile( __asm__ __volatile(
ASMALIGN(4) ASMALIGN(4)
".L03:\n\t" ".L03:\n\t"
"movq (%1),%%mm0\n\t" "movq (%1),%%mm0\n\t"

View File

@ -99,11 +99,6 @@ void outl(U16_t, U32_t);
#else /* not _MINIX and _ACK */ #else /* not _MINIX and _ACK */
# if defined(__STDC__) && (__STDC__ == 1)
# ifndef NCR
# define asm __asm
# endif
# endif
# ifdef SVR4 # ifdef SVR4
# include <sys/types.h> # include <sys/types.h>
# ifndef __USLC__ # ifndef __USLC__
@ -115,8 +110,8 @@ void outl(U16_t, U32_t);
#else #else
# include "../common/scoasm.h" # include "../common/scoasm.h"
#endif #endif
#define intr_disable() asm("cli") #define intr_disable() __asm__("cli")
#define intr_enable() asm("sti") #define intr_enable() __asm__("sti")
#endif /* _MINIX and _ACK */ #endif /* _MINIX and _ACK */
#endif /* __GNUC__ */ #endif /* __GNUC__ */

View File

@ -62,7 +62,7 @@ static __inline__ int enable_os_io(void)
/* Calling callgate with function 13 sets IOPL for the program */ /* Calling callgate with function 13 sets IOPL for the program */
asm volatile ("movl $13,%%ebx;.byte 0xff,0x1d;.long _callgate" __asm__ volatile ("movl $13,%%ebx;.byte 0xff,0x1d;.long _callgate"
: /*no outputs */ : /*no outputs */
: /*no inputs */ : /*no inputs */
: "eax","ebx","ecx","edx","cc"); : "eax","ebx","ecx","edx","cc");