mirror of
https://github.com/mpv-player/mpv
synced 2025-03-20 18:28:01 +00:00
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
Neither variant is valid C99 syntax, but __asm__ is the most portable variant. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@27788 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
parent
629db77d89
commit
6b52a2e974
@ -86,7 +86,7 @@ static unsigned int GetTimer(){
|
||||
static inline unsigned long long int read_tsc( void )
|
||||
{
|
||||
unsigned long long int retval;
|
||||
__asm __volatile ("rdtsc":"=A"(retval)::"memory");
|
||||
__asm__ __volatile ("rdtsc":"=A"(retval)::"memory");
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
6
configure
vendored
6
configure
vendored
@ -2124,8 +2124,8 @@ EOF
|
||||
cat > $TMPC << EOF
|
||||
int main(void) {
|
||||
unsigned long ver, mask;
|
||||
asm ("implver %0" : "=r" (ver));
|
||||
asm ("amask %1, %0" : "=r" (mask) : "r" (-1));
|
||||
__asm__ ("implver %0" : "=r" (ver));
|
||||
__asm__ ("amask %1, %0" : "=r" (mask) : "r" (-1));
|
||||
printf("%ld-%x\n", ver, ~mask);
|
||||
return 0;
|
||||
}
|
||||
@ -2374,7 +2374,7 @@ echocheck ".align is a power of two"
|
||||
if test "$_asmalign_pot" = auto ; then
|
||||
_asmalign_pot=no
|
||||
cat > $TMPC << EOF
|
||||
int main(void) { asm (".align 3"); return 0; }
|
||||
int main(void) { __asm__ (".align 3"); return 0; }
|
||||
EOF
|
||||
cc_check && _asmalign_pot=yes
|
||||
fi
|
||||
|
14
cpudetect.c
14
cpudetect.c
@ -85,14 +85,14 @@ static void
|
||||
do_cpuid(unsigned int ax, unsigned int *p)
|
||||
{
|
||||
#if 0
|
||||
__asm __volatile(
|
||||
__asm__ __volatile(
|
||||
"cpuid;"
|
||||
: "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
|
||||
: "0" (ax)
|
||||
);
|
||||
#else
|
||||
// code from libavcodec:
|
||||
__asm __volatile
|
||||
__asm__ __volatile__
|
||||
("mov %%"REG_b", %%"REG_S"\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg %%"REG_b", %%"REG_S
|
||||
@ -400,7 +400,7 @@ static void check_os_katmai_support( void )
|
||||
if ( gCpuCaps.hasSSE ) {
|
||||
mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );
|
||||
exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
|
||||
__asm __volatile ("xorps %xmm0, %xmm0");
|
||||
__asm__ __volatile ("xorps %xmm0, %xmm0");
|
||||
SetUnhandledExceptionFilter(exc_fil);
|
||||
mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );
|
||||
}
|
||||
@ -409,7 +409,7 @@ static void check_os_katmai_support( void )
|
||||
if ( gCpuCaps.hasSSE ) {
|
||||
mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );
|
||||
DosSetExceptionHandler( &RegRec );
|
||||
__asm __volatile ("xorps %xmm0, %xmm0");
|
||||
__asm__ __volatile ("xorps %xmm0, %xmm0");
|
||||
DosUnsetExceptionHandler( &RegRec );
|
||||
mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );
|
||||
}
|
||||
@ -432,8 +432,8 @@ static void check_os_katmai_support( void )
|
||||
if ( gCpuCaps.hasSSE ) {
|
||||
mp_msg(MSGT_CPUDETECT,MSGL_V, "Testing OS support for SSE... " );
|
||||
|
||||
// __asm __volatile ("xorps %%xmm0, %%xmm0");
|
||||
__asm __volatile ("xorps %xmm0, %xmm0");
|
||||
// __asm__ __volatile ("xorps %%xmm0, %%xmm0");
|
||||
__asm__ __volatile ("xorps %xmm0, %xmm0");
|
||||
|
||||
mp_msg(MSGT_CPUDETECT,MSGL_V, gCpuCaps.hasSSE ? "yes.\n" : "no!\n" );
|
||||
}
|
||||
@ -532,7 +532,7 @@ void GetCpuCaps( CpuCaps *caps)
|
||||
} else {
|
||||
canjump = 1;
|
||||
|
||||
asm volatile ("mtspr 256, %0\n\t"
|
||||
__asm__ volatile ("mtspr 256, %0\n\t"
|
||||
"vand %%v0, %%v0, %%v0"
|
||||
:
|
||||
: "r" (-1));
|
||||
|
@ -49,9 +49,9 @@ cpuid(int func) {
|
||||
cpuid_regs_t regs;
|
||||
#define CPUID ".byte 0x0f, 0xa2; "
|
||||
#ifdef __x86_64__
|
||||
asm("mov %%rbx, %%rsi\n\t"
|
||||
__asm__("mov %%rbx, %%rsi\n\t"
|
||||
#else
|
||||
asm("mov %%ebx, %%esi\n\t"
|
||||
__asm__("mov %%ebx, %%esi\n\t"
|
||||
#endif
|
||||
CPUID"\n\t"
|
||||
#ifdef __x86_64__
|
||||
@ -70,7 +70,7 @@ rdtsc(void)
|
||||
{
|
||||
uint64_t i;
|
||||
#define RDTSC ".byte 0x0f, 0x31; "
|
||||
asm volatile (RDTSC : "=A"(i) : );
|
||||
__asm__ volatile (RDTSC : "=A"(i) : );
|
||||
return i;
|
||||
}
|
||||
|
||||
|
@ -129,7 +129,7 @@ static int swap_fourcc __initdata = 0;
|
||||
static inline double FastSin(double x)
|
||||
{
|
||||
register double res;
|
||||
__asm __volatile("fsin":"=t"(res):"0"(x));
|
||||
__asm__ __volatile("fsin":"=t"(res):"0"(x));
|
||||
return res;
|
||||
}
|
||||
#undef sin
|
||||
@ -138,7 +138,7 @@ static inline double FastSin(double x)
|
||||
static inline double FastCos(double x)
|
||||
{
|
||||
register double res;
|
||||
__asm __volatile("fcos":"=t"(res):"0"(x));
|
||||
__asm__ __volatile("fcos":"=t"(res):"0"(x));
|
||||
return res;
|
||||
}
|
||||
#undef cos
|
||||
|
@ -689,7 +689,7 @@ void upmix_C (sample_t * samples, int acmod, int output)
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64)
|
||||
static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %2, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -712,7 +712,7 @@ static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
|
||||
|
||||
static void mix3to1_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -733,7 +733,7 @@ static void mix3to1_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix4to1_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -755,7 +755,7 @@ static void mix4to1_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix5to1_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -778,7 +778,7 @@ static void mix5to1_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix3to2_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -801,7 +801,7 @@ static void mix3to2_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %2, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -824,7 +824,7 @@ static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
|
||||
|
||||
static void mix21toS_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -848,7 +848,7 @@ static void mix21toS_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix31to2_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -872,7 +872,7 @@ static void mix31to2_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix31toS_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -898,7 +898,7 @@ static void mix31toS_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix22toS_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -923,7 +923,7 @@ static void mix22toS_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix32to2_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -947,7 +947,7 @@ static void mix32to2_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix32toS_SSE (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %1, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -974,7 +974,7 @@ static void mix32toS_SSE (sample_t * samples, sample_t bias)
|
||||
|
||||
static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movlps %2, %%xmm7 \n\t"
|
||||
"shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -997,7 +997,7 @@ static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
|
||||
|
||||
static void zero_MMX(sample_t * samples)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
ASMALIGN(4)
|
||||
@ -1223,7 +1223,7 @@ static void upmix_MMX (sample_t * samples, int acmod, int output)
|
||||
|
||||
static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %2, %%mm7 \n\t"
|
||||
"punpckldq %2, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1254,7 +1254,7 @@ static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
|
||||
|
||||
static void mix3to1_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"punpckldq %1, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1281,7 +1281,7 @@ static void mix3to1_3dnow (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix4to1_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"punpckldq %1, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1310,7 +1310,7 @@ static void mix4to1_3dnow (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix5to1_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"punpckldq %1, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1341,7 +1341,7 @@ static void mix5to1_3dnow (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix3to2_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"punpckldq %1, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1372,7 +1372,7 @@ static void mix3to2_3dnow (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %2, %%mm7 \n\t"
|
||||
"punpckldq %2, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1403,7 +1403,7 @@ static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
|
||||
|
||||
static void mix21toS_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"punpckldq %1, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1436,7 +1436,7 @@ static void mix21toS_3dnow (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix31to2_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"punpckldq %1, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1469,7 +1469,7 @@ static void mix31to2_3dnow (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix31toS_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"punpckldq %1, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1506,7 +1506,7 @@ static void mix31toS_3dnow (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix22toS_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"punpckldq %1, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1541,7 +1541,7 @@ static void mix22toS_3dnow (sample_t * samples, sample_t bias)
|
||||
|
||||
static void mix32to2_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %1, %%mm7 \n\t"
|
||||
"punpckldq %1, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1575,7 +1575,7 @@ static void mix32to2_3dnow (sample_t * samples, sample_t bias)
|
||||
/* todo: should be optimized better */
|
||||
static void mix32toS_3dnow (sample_t * samples, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
ASMALIGN(4)
|
||||
"1: \n\t"
|
||||
@ -1614,7 +1614,7 @@ static void mix32toS_3dnow (sample_t * samples, sample_t bias)
|
||||
|
||||
static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
|
||||
{
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %2, %%mm7 \n\t"
|
||||
"punpckldq %2, %%mm7 \n\t"
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1782,7 +1782,7 @@ static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t b
|
||||
memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
|
||||
break;
|
||||
}
|
||||
__asm __volatile("femms":::"memory");
|
||||
__asm__ volatile("femms":::"memory");
|
||||
}
|
||||
|
||||
#endif // ARCH_X86 || ARCH_X86_64
|
||||
|
@ -750,7 +750,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
|
||||
/* Pre IFFT complex multiply plus IFFT cmplx conjugate */
|
||||
/* Bit reversed shuffling */
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_S", %%"REG_S" \n\t"
|
||||
"lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t"
|
||||
"mov $1008, %%"REG_D" \n\t"
|
||||
@ -810,7 +810,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
|
||||
/* 1. iteration */
|
||||
// Note w[0][0]={1,0}
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"xorps %%xmm1, %%xmm1 \n\t"
|
||||
"xorps %%xmm2, %%xmm2 \n\t"
|
||||
"mov %0, %%"REG_S" \n\t"
|
||||
@ -832,7 +832,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
|
||||
/* 2. iteration */
|
||||
// Note w[1]={{1,0}, {0,-1}}
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1
|
||||
"mov %0, %%"REG_S" \n\t"
|
||||
ASMALIGN(4)
|
||||
@ -860,7 +860,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
Note sseW2+32={0,0,-sqrt(2),-sqrt(2))
|
||||
Note sseW2+48={1,-1,sqrt(2),-sqrt(2))
|
||||
*/
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
|
||||
"movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
|
||||
"xorps %%xmm5, %%xmm5 \n\t"
|
||||
@ -905,7 +905,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
two_m_plus_one = two_m<<1;
|
||||
two_m_plus_one_shl3 = (two_m_plus_one<<3);
|
||||
buf_offset = buf+128;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov %0, %%"REG_S" \n\t"
|
||||
ASMALIGN(4)
|
||||
"1: \n\t"
|
||||
@ -937,7 +937,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
}
|
||||
|
||||
/* Post IFFT complex multiply plus IFFT complex conjugate*/
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
ASMALIGN(4)
|
||||
"1: \n\t"
|
||||
@ -960,7 +960,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
window_ptr = a52_imdct_window;
|
||||
|
||||
/* Window and convert to real valued signal */
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_D", %%"REG_D" \n\t" // 0
|
||||
"xor %%"REG_S", %%"REG_S" \n\t" // 0
|
||||
"movss %3, %%xmm2 \n\t" // bias
|
||||
@ -987,7 +987,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
delay_ptr+=128;
|
||||
// window_ptr+=128;
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $1024, %%"REG_D" \n\t" // 512
|
||||
"xor %%"REG_S", %%"REG_S" \n\t" // 0
|
||||
"movss %3, %%xmm2 \n\t" // bias
|
||||
@ -1016,7 +1016,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
/* The trailing edge of the window goes into the delay line */
|
||||
delay_ptr = delay;
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_D", %%"REG_D" \n\t" // 0
|
||||
"xor %%"REG_S", %%"REG_S" \n\t" // 0
|
||||
ASMALIGN(4)
|
||||
@ -1038,7 +1038,7 @@ imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
|
||||
delay_ptr+=128;
|
||||
// window_ptr-=128;
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $1024, %%"REG_D" \n\t" // 1024
|
||||
"xor %%"REG_S", %%"REG_S" \n\t" // 0
|
||||
ASMALIGN(4)
|
||||
|
@ -431,7 +431,7 @@ imdct_do_512_3dnow
|
||||
*/
|
||||
|
||||
FFT_128P_3DNOW (&buf[0]);
|
||||
// asm volatile ("femms \n\t":::"memory");
|
||||
// __asm__ volatile ("femms \n\t":::"memory");
|
||||
|
||||
/* Post IFFT complex multiply plus IFFT complex conjugate*/
|
||||
#if 1
|
||||
@ -489,7 +489,7 @@ imdct_do_512_3dnow
|
||||
|
||||
/* Window and convert to real valued signal */
|
||||
#if 1
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"movd (%0), %%mm3 \n\t"
|
||||
"punpckldq %%mm3, %%mm3 \n\t"
|
||||
:: "r" (&bias)
|
||||
|
@ -1,6 +1,6 @@
|
||||
--- include/a52.h 2006-06-12 15:04:57.000000000 +0200
|
||||
+++ liba52/a52.h 2006-06-05 02:23:02.000000000 +0200
|
||||
@@ -59,4 +63,9 @@
|
||||
@@ -63,4 +63,9 @@
|
||||
int a52_block (a52_state_t * state);
|
||||
void a52_free (a52_state_t * state);
|
||||
|
||||
@ -12,7 +12,7 @@
|
||||
#endif /* A52_H */
|
||||
--- liba52/a52_internal.h 2006-06-12 15:05:07.000000000 +0200
|
||||
+++ liba52/a52_internal.h 2006-06-05 02:23:02.000000000 +0200
|
||||
@@ -103,18 +107,34 @@
|
||||
@@ -107,18 +107,34 @@
|
||||
#define DELTA_BIT_NONE (2)
|
||||
#define DELTA_BIT_RESERVED (3)
|
||||
|
||||
@ -52,7 +52,7 @@
|
||||
+void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias);
|
||||
--- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200
|
||||
+++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200
|
||||
@@ -31,6 +35,10 @@
|
||||
@@ -35,6 +35,10 @@
|
||||
|
||||
#define BUFFER_SIZE 4096
|
||||
|
||||
@ -63,7 +63,7 @@
|
||||
void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf)
|
||||
{
|
||||
int align;
|
||||
@@ -38,6 +46,9 @@
|
||||
@@ -42,6 +46,9 @@
|
||||
align = (long)buf & 3;
|
||||
state->buffer_start = (uint32_t *) (buf - align);
|
||||
state->bits_left = 0;
|
||||
@ -75,7 +75,7 @@
|
||||
|
||||
--- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200
|
||||
+++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200
|
||||
@@ -21,6 +25,42 @@
|
||||
@@ -25,6 +25,42 @@
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
@ -118,7 +118,7 @@
|
||||
/* (stolen from the kernel) */
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
|
||||
@@ -28,7 +74,7 @@
|
||||
@@ -32,7 +74,7 @@
|
||||
|
||||
#else
|
||||
|
||||
@ -127,7 +127,7 @@
|
||||
|
||||
# define swab32(x) __i386_swab32(x)
|
||||
static inline const uint32_t __i386_swab32(uint32_t x)
|
||||
@@ -39,19 +85,34 @@
|
||||
@@ -43,19 +85,34 @@
|
||||
|
||||
# else
|
||||
|
||||
@ -166,7 +166,7 @@
|
||||
uint32_t result;
|
||||
|
||||
if (num_bits < state->bits_left) {
|
||||
@@ -61,10 +122,29 @@
|
||||
@@ -65,10 +122,29 @@
|
||||
}
|
||||
|
||||
return a52_bitstream_get_bh (state, num_bits);
|
||||
@ -196,7 +196,7 @@
|
||||
int32_t result;
|
||||
|
||||
if (num_bits < state->bits_left) {
|
||||
@@ -74,4 +154,5 @@
|
||||
@@ -78,4 +154,5 @@
|
||||
}
|
||||
|
||||
return a52_bitstream_get_bh_2 (state, num_bits);
|
||||
@ -204,7 +204,7 @@
|
||||
}
|
||||
--- liba52/downmix.c 2006-06-12 15:17:53.000000000 +0200
|
||||
+++ liba52/downmix.c 2006-06-05 02:23:02.000000000 +0200
|
||||
@@ -19,18 +23,46 @@
|
||||
@@ -23,18 +23,46 @@
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
@ -251,7 +251,7 @@
|
||||
int a52_downmix_init (int input, int flags, sample_t * level,
|
||||
sample_t clev, sample_t slev)
|
||||
{
|
||||
@@ -447,7 +479,7 @@
|
||||
@@ -451,7 +479,7 @@
|
||||
samples[i] = 0;
|
||||
}
|
||||
|
||||
@ -260,7 +260,7 @@
|
||||
sample_t clev, sample_t slev)
|
||||
{
|
||||
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
|
||||
@@ -559,7 +591,7 @@
|
||||
@@ -563,7 +591,7 @@
|
||||
break;
|
||||
|
||||
case CONVERT (A52_3F2R, A52_2F1R):
|
||||
@ -269,7 +269,7 @@
|
||||
move2to1 (samples + 768, samples + 512, bias);
|
||||
break;
|
||||
|
||||
@@ -583,12 +615,12 @@
|
||||
@@ -587,12 +615,12 @@
|
||||
break;
|
||||
|
||||
case CONVERT (A52_3F1R, A52_3F2R):
|
||||
@ -284,7 +284,7 @@
|
||||
{
|
||||
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
|
||||
|
||||
@@ -653,3 +685,1104 @@
|
||||
@@ -657,3 +685,1104 @@
|
||||
goto mix_31to21;
|
||||
}
|
||||
}
|
||||
@ -292,7 +292,7 @@
|
||||
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
|
||||
+static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %2, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -315,7 +315,7 @@
|
||||
+
|
||||
+static void mix3to1_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -336,7 +336,7 @@
|
||||
+
|
||||
+static void mix4to1_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -358,7 +358,7 @@
|
||||
+
|
||||
+static void mix5to1_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -381,7 +381,7 @@
|
||||
+
|
||||
+static void mix3to2_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -404,7 +404,7 @@
|
||||
+
|
||||
+static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %2, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -427,7 +427,7 @@
|
||||
+
|
||||
+static void mix21toS_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -451,7 +451,7 @@
|
||||
+
|
||||
+static void mix31to2_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -475,7 +475,7 @@
|
||||
+
|
||||
+static void mix31toS_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -501,7 +501,7 @@
|
||||
+
|
||||
+static void mix22toS_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -526,7 +526,7 @@
|
||||
+
|
||||
+static void mix32to2_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -550,7 +550,7 @@
|
||||
+
|
||||
+static void mix32toS_SSE (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %1, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -577,7 +577,7 @@
|
||||
+
|
||||
+static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movlps %2, %%xmm7 \n\t"
|
||||
+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -600,7 +600,7 @@
|
||||
+
|
||||
+static void zero_MMX(sample_t * samples)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
+ "pxor %%mm0, %%mm0 \n\t"
|
||||
+ ASMALIGN(4)
|
||||
@ -826,7 +826,7 @@
|
||||
+
|
||||
+static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %2, %%mm7 \n\t"
|
||||
+ "punpckldq %2, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -857,7 +857,7 @@
|
||||
+
|
||||
+static void mix3to1_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %1, %%mm7 \n\t"
|
||||
+ "punpckldq %1, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -884,7 +884,7 @@
|
||||
+
|
||||
+static void mix4to1_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %1, %%mm7 \n\t"
|
||||
+ "punpckldq %1, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -913,7 +913,7 @@
|
||||
+
|
||||
+static void mix5to1_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %1, %%mm7 \n\t"
|
||||
+ "punpckldq %1, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -944,7 +944,7 @@
|
||||
+
|
||||
+static void mix3to2_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %1, %%mm7 \n\t"
|
||||
+ "punpckldq %1, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -975,7 +975,7 @@
|
||||
+
|
||||
+static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %2, %%mm7 \n\t"
|
||||
+ "punpckldq %2, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1006,7 +1006,7 @@
|
||||
+
|
||||
+static void mix21toS_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %1, %%mm7 \n\t"
|
||||
+ "punpckldq %1, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1039,7 +1039,7 @@
|
||||
+
|
||||
+static void mix31to2_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %1, %%mm7 \n\t"
|
||||
+ "punpckldq %1, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1072,7 +1072,7 @@
|
||||
+
|
||||
+static void mix31toS_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %1, %%mm7 \n\t"
|
||||
+ "punpckldq %1, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1109,7 +1109,7 @@
|
||||
+
|
||||
+static void mix22toS_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %1, %%mm7 \n\t"
|
||||
+ "punpckldq %1, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1144,7 +1144,7 @@
|
||||
+
|
||||
+static void mix32to2_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %1, %%mm7 \n\t"
|
||||
+ "punpckldq %1, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1178,7 +1178,7 @@
|
||||
+/* todo: should be optimized better */
|
||||
+static void mix32toS_3dnow (sample_t * samples, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
+ ASMALIGN(4)
|
||||
+ "1: \n\t"
|
||||
@ -1217,7 +1217,7 @@
|
||||
+
|
||||
+static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
|
||||
+{
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movd %2, %%mm7 \n\t"
|
||||
+ "punpckldq %2, %%mm7 \n\t"
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
@ -1385,13 +1385,13 @@
|
||||
+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
|
||||
+ break;
|
||||
+ }
|
||||
+ __asm __volatile("femms":::"memory");
|
||||
+ __asm__ volatile("femms":::"memory");
|
||||
+}
|
||||
+
|
||||
+#endif // ARCH_X86 || ARCH_X86_64
|
||||
--- liba52/imdct.c 2008-02-19 00:18:33.000000000 +0100
|
||||
+++ liba52/imdct.c 2008-02-19 00:16:40.000000000 +0100
|
||||
@@ -22,6 +26,11 @@
|
||||
@@ -26,6 +26,11 @@
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
@ -1403,7 +1403,7 @@
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
@@ -39,12 +48,49 @@
|
||||
@@ -43,12 +48,49 @@
|
||||
#include "a52.h"
|
||||
#include "a52_internal.h"
|
||||
#include "mm_accel.h"
|
||||
@ -1453,7 +1453,7 @@
|
||||
static uint8_t fftorder[] = {
|
||||
0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
|
||||
8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
|
||||
@@ -56,6 +102,40 @@
|
||||
@@ -60,6 +102,40 @@
|
||||
6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
|
||||
};
|
||||
|
||||
@ -1494,7 +1494,7 @@
|
||||
/* Root values for IFFT */
|
||||
static sample_t roots16[3];
|
||||
static sample_t roots32[7];
|
||||
@@ -241,7 +321,7 @@
|
||||
@@ -245,7 +321,7 @@
|
||||
ifft_pass (buf, roots128 - 32, 32);
|
||||
}
|
||||
|
||||
@ -1503,7 +1503,7 @@
|
||||
{
|
||||
int i, k;
|
||||
sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
|
||||
@@ -285,6 +365,701 @@
|
||||
@@ -289,6 +365,701 @@
|
||||
}
|
||||
}
|
||||
|
||||
@ -1892,7 +1892,7 @@
|
||||
+
|
||||
+ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
|
||||
+ /* Bit reversed shuffling */
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "xor %%"REG_S", %%"REG_S" \n\t"
|
||||
+ "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t"
|
||||
+ "mov $1008, %%"REG_D" \n\t"
|
||||
@ -1952,7 +1952,7 @@
|
||||
+
|
||||
+ /* 1. iteration */
|
||||
+ // Note w[0][0]={1,0}
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "xorps %%xmm1, %%xmm1 \n\t"
|
||||
+ "xorps %%xmm2, %%xmm2 \n\t"
|
||||
+ "mov %0, %%"REG_S" \n\t"
|
||||
@ -1974,7 +1974,7 @@
|
||||
+
|
||||
+ /* 2. iteration */
|
||||
+ // Note w[1]={{1,0}, {0,-1}}
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1
|
||||
+ "mov %0, %%"REG_S" \n\t"
|
||||
+ ASMALIGN(4)
|
||||
@ -2002,7 +2002,7 @@
|
||||
+ Note sseW2+32={0,0,-sqrt(2),-sqrt(2))
|
||||
+ Note sseW2+48={1,-1,sqrt(2),-sqrt(2))
|
||||
+*/
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
|
||||
+ "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
|
||||
+ "xorps %%xmm5, %%xmm5 \n\t"
|
||||
@ -2047,7 +2047,7 @@
|
||||
+ two_m_plus_one = two_m<<1;
|
||||
+ two_m_plus_one_shl3 = (two_m_plus_one<<3);
|
||||
+ buf_offset = buf+128;
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "mov %0, %%"REG_S" \n\t"
|
||||
+ ASMALIGN(4)
|
||||
+ "1: \n\t"
|
||||
@ -2079,7 +2079,7 @@
|
||||
+ }
|
||||
+
|
||||
+ /* Post IFFT complex multiply plus IFFT complex conjugate*/
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "mov $-1024, %%"REG_S" \n\t"
|
||||
+ ASMALIGN(4)
|
||||
+ "1: \n\t"
|
||||
@ -2102,7 +2102,7 @@
|
||||
+ window_ptr = a52_imdct_window;
|
||||
+
|
||||
+ /* Window and convert to real valued signal */
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "xor %%"REG_D", %%"REG_D" \n\t" // 0
|
||||
+ "xor %%"REG_S", %%"REG_S" \n\t" // 0
|
||||
+ "movss %3, %%xmm2 \n\t" // bias
|
||||
@ -2129,7 +2129,7 @@
|
||||
+ delay_ptr+=128;
|
||||
+// window_ptr+=128;
|
||||
+
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "mov $1024, %%"REG_D" \n\t" // 512
|
||||
+ "xor %%"REG_S", %%"REG_S" \n\t" // 0
|
||||
+ "movss %3, %%xmm2 \n\t" // bias
|
||||
@ -2158,7 +2158,7 @@
|
||||
+ /* The trailing edge of the window goes into the delay line */
|
||||
+ delay_ptr = delay;
|
||||
+
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "xor %%"REG_D", %%"REG_D" \n\t" // 0
|
||||
+ "xor %%"REG_S", %%"REG_S" \n\t" // 0
|
||||
+ ASMALIGN(4)
|
||||
@ -2180,7 +2180,7 @@
|
||||
+ delay_ptr+=128;
|
||||
+// window_ptr-=128;
|
||||
+
|
||||
+ asm volatile(
|
||||
+ __asm__ volatile(
|
||||
+ "mov $1024, %%"REG_D" \n\t" // 1024
|
||||
+ "xor %%"REG_S", %%"REG_S" \n\t" // 0
|
||||
+ ASMALIGN(4)
|
||||
@ -2205,7 +2205,7 @@
|
||||
void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
|
||||
{
|
||||
int i, k;
|
||||
@@ -364,7 +1145,7 @@
|
||||
@@ -368,7 +1145,7 @@
|
||||
|
||||
void a52_imdct_init (uint32_t mm_accel)
|
||||
{
|
||||
@ -2214,7 +2214,7 @@
|
||||
double sum;
|
||||
|
||||
/* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
|
||||
@@ -416,6 +1197,99 @@
|
||||
@@ -420,6 +1197,99 @@
|
||||
post2[i].real = cos ((M_PI / 128) * (i + 0.5));
|
||||
post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
|
||||
}
|
||||
@ -2314,7 +2314,7 @@
|
||||
|
||||
#ifdef LIBA52_DJBFFT
|
||||
if (mm_accel & MM_ACCEL_DJBFFT) {
|
||||
@@ -426,7 +1300,5 @@
|
||||
@@ -430,7 +1300,5 @@
|
||||
#endif
|
||||
{
|
||||
fprintf (stderr, "No accelerated IMDCT transform found\n");
|
||||
@ -2324,7 +2324,7 @@
|
||||
}
|
||||
--- include/mm_accel.h 2006-06-12 15:05:00.000000000 +0200
|
||||
+++ liba52/mm_accel.h 2006-06-05 02:23:04.000000000 +0200
|
||||
@@ -30,7 +34,12 @@
|
||||
@@ -34,7 +34,12 @@
|
||||
/* x86 accelerations */
|
||||
#define MM_ACCEL_X86_MMX 0x80000000
|
||||
#define MM_ACCEL_X86_3DNOW 0x40000000
|
||||
|
@ -38,7 +38,7 @@ static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF000
|
||||
|
||||
static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-512, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
"movq "MANGLE(wm1100)", %%mm3 \n\t"
|
||||
@ -77,7 +77,7 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
/* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
|
||||
#ifdef HAVE_SSE
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"1: \n\t"
|
||||
"cvtps2pi (%1, %%"REG_S"), %%mm0\n\t"
|
||||
@ -93,7 +93,7 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
|
||||
:: "r" (s16+512), "r" (f+256)
|
||||
:"%"REG_S, "memory"
|
||||
);*/
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
"1: \n\t"
|
||||
@ -123,7 +123,7 @@ static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
|
||||
|
||||
static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
"pxor %%mm6, %%mm6 \n\t"
|
||||
@ -177,7 +177,7 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
|
||||
|
||||
static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
"1: \n\t"
|
||||
@ -228,7 +228,7 @@ static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
|
||||
|
||||
static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
"1: \n\t"
|
||||
@ -287,7 +287,7 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
|
||||
|
||||
static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
"pxor %%mm6, %%mm6 \n\t"
|
||||
@ -327,7 +327,7 @@ static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
|
||||
|
||||
static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
"pxor %%mm6, %%mm6 \n\t"
|
||||
@ -365,7 +365,7 @@ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
|
||||
|
||||
static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
"pxor %%mm6, %%mm6 \n\t"
|
||||
@ -405,7 +405,7 @@ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
|
||||
|
||||
static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
// "pxor %%mm6, %%mm6 \n\t"
|
||||
@ -451,7 +451,7 @@ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
|
||||
|
||||
static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
|
||||
int32_t * f = (int32_t *) _f;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov $-1024, %%"REG_S" \n\t"
|
||||
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
|
||||
// "pxor %%mm6, %%mm6 \n\t"
|
||||
|
@ -46,7 +46,7 @@ void mp_msg( int x, const char *format, ... ) // stub for cpudetect.c
|
||||
static inline long long rdtsc()
|
||||
{
|
||||
long long l;
|
||||
asm volatile( "rdtsc\n\t"
|
||||
__asm__ volatile("rdtsc\n\t"
|
||||
: "=A" (l)
|
||||
);
|
||||
// printf("%d\n", int(l/1000));
|
||||
|
@ -374,10 +374,10 @@ void *decode_video(sh_video_t *sh_video, unsigned char *start, int in_size,
|
||||
// some codecs are broken, and doesn't restore MMX state :(
|
||||
// it happens usually with broken/damaged files.
|
||||
if (gCpuCaps.has3DNow) {
|
||||
__asm __volatile ("femms\n\t":::"memory");
|
||||
__asm__ __volatile ("femms\n\t":::"memory");
|
||||
}
|
||||
else if (gCpuCaps.hasMMX) {
|
||||
__asm __volatile ("emms\n\t":::"memory");
|
||||
__asm__ __volatile ("emms\n\t":::"memory");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
|
||||
{
|
||||
int ret;
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"movl $4, %%ecx \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
@ -61,7 +61,7 @@ static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
|
||||
static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
|
||||
{
|
||||
int ret;
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"movl $4, %%ecx \n\t"
|
||||
"pxor %%mm6, %%mm6 \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
@ -150,7 +150,7 @@ static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
|
||||
static int var_y_mmx(unsigned char *a, unsigned char *b, int s)
|
||||
{
|
||||
int ret;
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"movl $3, %%ecx \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
|
@ -23,7 +23,7 @@ struct vf_priv_s {
|
||||
static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
|
||||
{
|
||||
volatile short out[4];
|
||||
asm (
|
||||
__asm__ (
|
||||
"movl $8, %%ecx \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
|
@ -37,7 +37,7 @@ struct vf_priv_s
|
||||
static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
|
||||
{
|
||||
volatile short out[4];
|
||||
asm (
|
||||
__asm__ (
|
||||
"movl $8, %%ecx \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
|
@ -44,7 +44,7 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in
|
||||
contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast;
|
||||
|
||||
while (h--) {
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"movq (%5), %%mm3 \n\t"
|
||||
"movq (%6), %%mm4 \n\t"
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
@ -82,7 +82,7 @@ static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, in
|
||||
src += sstep;
|
||||
dest += dstep;
|
||||
}
|
||||
asm volatile ( "emms \n\t" ::: "memory" );
|
||||
__asm__ volatile ( "emms \n\t" ::: "memory" );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -130,7 +130,7 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
|
||||
dstep = dstride - w;
|
||||
|
||||
while (h-- > 0) {
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"movq (%5), %%mm3 \n\t"
|
||||
"movq (%6), %%mm4 \n\t"
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
@ -170,7 +170,7 @@ void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
|
||||
dst += dstep;
|
||||
}
|
||||
|
||||
asm volatile ( "emms \n\t" ::: "memory" );
|
||||
__asm__ volatile ( "emms \n\t" ::: "memory" );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -368,13 +368,13 @@ block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
|
||||
#define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise)
|
||||
|
||||
#define BLOCK_METRICS_TEMPLATE() \
|
||||
asm volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \
|
||||
__asm__ volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \
|
||||
"pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \
|
||||
); \
|
||||
a -= as; \
|
||||
b -= bs; \
|
||||
do { \
|
||||
asm volatile( \
|
||||
__asm__ volatile( \
|
||||
"movq (%0,%2), %%mm0\n\t" \
|
||||
"movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \
|
||||
PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \
|
||||
@ -439,7 +439,7 @@ block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
|
||||
static const unsigned long long ones = 0x0101010101010101ull;
|
||||
|
||||
BLOCK_METRICS_TEMPLATE();
|
||||
asm volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
|
||||
__asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
|
||||
get_block_stats(&tm, p, s);
|
||||
#endif
|
||||
return tm;
|
||||
@ -471,7 +471,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
|
||||
#ifdef DEBUG
|
||||
struct frame_stats ts = *s;
|
||||
#endif
|
||||
asm volatile("prefetcht0 (%0,%2)\n\t"
|
||||
__asm__ volatile("prefetcht0 (%0,%2)\n\t"
|
||||
"prefetcht0 (%1,%3)\n\t" :
|
||||
: "r" (a), "r" (b),
|
||||
"r" (prefetch_line * as), "r" (prefetch_line * bs));
|
||||
@ -479,7 +479,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
|
||||
BLOCK_METRICS_TEMPLATE();
|
||||
|
||||
s->num_blocks++;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movq %3, %%mm0\n\t"
|
||||
"movq %%mm7, %%mm1\n\t"
|
||||
"psubusw %%mm0, %%mm1\n\t"
|
||||
@ -525,7 +525,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
|
||||
s->interlaced_high += interlaced >> 16;
|
||||
s->interlaced_low += interlaced;
|
||||
} else {
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pcmpeqw %%mm0, %%mm0\n\t" /* -1 */
|
||||
"psubw %%mm0, %%mm4\n\t"
|
||||
"psubw %%mm0, %%mm5\n\t"
|
||||
@ -539,7 +539,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
|
||||
: "=m" (s->tiny), "=m" (s->low), "=m" (s->high)
|
||||
);
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pshufw $0, %2, %%mm0\n\t"
|
||||
"psubusw %%mm7, %%mm0\n\t"
|
||||
"pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */
|
||||
@ -556,7 +556,7 @@ block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
|
||||
);
|
||||
}
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movq %%mm7, (%1)\n\t"
|
||||
PMAXUW((%0), %%mm7)
|
||||
"movq %%mm7, (%0)\n\t"
|
||||
@ -597,7 +597,7 @@ dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
|
||||
#else
|
||||
unsigned long len = (w+7) >> 3;
|
||||
int ret;
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */
|
||||
"movd %0, %%mm7 \n\t"
|
||||
"punpcklbw %%mm7, %%mm7 \n\t"
|
||||
@ -607,7 +607,7 @@ dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
|
||||
: "rm" (t)
|
||||
);
|
||||
do {
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"movq (%0), %%mm0\n\t"
|
||||
"movq (%0,%3,2), %%mm1\n\t"
|
||||
"movq %%mm0, (%2)\n\t"
|
||||
@ -639,7 +639,7 @@ dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
|
||||
dst += 8;
|
||||
} while (--len);
|
||||
|
||||
asm volatile ("pxor %%mm7, %%mm7 \n\t"
|
||||
__asm__ volatile ("pxor %%mm7, %%mm7 \n\t"
|
||||
"psadbw %%mm6, %%mm7 \n\t"
|
||||
"movd %%mm7, %0 \n\t"
|
||||
"emms \n\t"
|
||||
|
@ -187,7 +187,7 @@ static void store_slice_mmx(uint8_t *dst, int16_t *src, long dst_stride, long sr
|
||||
width = (width+7)&~7;
|
||||
dst_stride-=width;
|
||||
//src_stride=(src_stride-width)*2;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov %5, %%"REG_d" \n\t"
|
||||
"mov %6, %%"REG_S" \n\t"
|
||||
"mov %7, %%"REG_D" \n\t"
|
||||
@ -255,7 +255,7 @@ static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long s
|
||||
width = (width+7)&~7;
|
||||
dst_stride-=width;
|
||||
//src_stride=(src_stride-width)*2;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov %5, %%"REG_d" \n\t"
|
||||
"mov %6, %%"REG_S" \n\t"
|
||||
"mov %7, %%"REG_D" \n\t"
|
||||
@ -318,7 +318,7 @@ static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long s
|
||||
static void mul_thrmat_mmx(struct vf_priv_s *p, int q)
|
||||
{
|
||||
uint64_t *adr=&p->threshold_mtx_noq[0];
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movd %0, %%mm7 \n\t"
|
||||
"add $8*8*2, %%"REG_D" \n\t"
|
||||
"movq 0*8(%%"REG_S"), %%mm0 \n\t"
|
||||
@ -558,10 +558,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts)
|
||||
}
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t");
|
||||
if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
|
||||
#endif
|
||||
#ifdef HAVE_MMX2
|
||||
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t");
|
||||
if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
|
||||
#endif
|
||||
return vf_next_put_image(vf,dmpi, pts);
|
||||
}
|
||||
@ -868,7 +868,7 @@ static void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int
|
||||
static void column_fidct_mmx(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt)
|
||||
{
|
||||
uint64_t __attribute__((aligned(8))) temps[4];
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
ASMALIGN(4)
|
||||
"1: \n\t"
|
||||
"movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t"
|
||||
@ -1669,7 +1669,7 @@ static void row_idct_mmx (DCTELEM* workspace,
|
||||
int16_t* output_adr, int output_stride, int cnt)
|
||||
{
|
||||
uint64_t __attribute__((aligned(8))) temps[4];
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
|
||||
"1: \n\t"
|
||||
"movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm0 \n\t"
|
||||
@ -1935,7 +1935,7 @@ static void row_fdct_c(DCTELEM *data, const uint8_t *pixels, int line_size, int
|
||||
static void row_fdct_mmx(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt)
|
||||
{
|
||||
uint64_t __attribute__((aligned(8))) temps[4];
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
|
||||
"6: \n\t"
|
||||
"movd (%%"REG_S"), %%mm0 \n\t"
|
||||
|
@ -37,7 +37,7 @@ static void halfpack_MMX(unsigned char *dst, unsigned char *src[3],
|
||||
vinc = srcstride[2] - w/2;
|
||||
|
||||
for (h/=2; h; h--) {
|
||||
asm (
|
||||
__asm__ (
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
ASMALIGN(4)
|
||||
"1: \n\t"
|
||||
@ -99,7 +99,7 @@ static void halfpack_MMX(unsigned char *dst, unsigned char *src[3],
|
||||
v += vinc;
|
||||
dst += dstinc;
|
||||
}
|
||||
asm volatile ( "emms \n\t" ::: "memory" );
|
||||
__asm__ volatile ( "emms \n\t" ::: "memory" );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -61,7 +61,7 @@ static void pack_li_1_C(unsigned char *dst, unsigned char *y,
|
||||
static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w)
|
||||
{
|
||||
asm volatile (""
|
||||
__asm__ volatile (""
|
||||
ASMALIGN(4)
|
||||
"1: \n\t"
|
||||
"movq (%0), %%mm1 \n\t"
|
||||
@ -91,7 +91,7 @@ static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
|
||||
static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w, int us, int vs)
|
||||
{
|
||||
asm volatile (""
|
||||
__asm__ volatile (""
|
||||
"push %%"REG_BP" \n\t"
|
||||
#ifdef ARCH_X86_64
|
||||
"mov %6, %%"REG_BP" \n\t"
|
||||
@ -199,7 +199,7 @@ static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
|
||||
static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
|
||||
unsigned char *u, unsigned char *v, int w, int us, int vs)
|
||||
{
|
||||
asm volatile (""
|
||||
__asm__ volatile (""
|
||||
"push %%"REG_BP" \n\t"
|
||||
#ifdef ARCH_X86_64
|
||||
"mov %6, %%"REG_BP" \n\t"
|
||||
|
@ -46,7 +46,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
|
||||
int i;
|
||||
short out[24]; // output buffer for the partial metrics from the mmx code
|
||||
|
||||
asm (
|
||||
__asm__ (
|
||||
"movl $4, %%ecx \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t" // 4 even difference sums
|
||||
"pxor %%mm5, %%mm5 \n\t" // 4 odd difference sums
|
||||
@ -105,7 +105,7 @@ static void block_diffs_MMX(struct metrics *m, unsigned char *old, unsigned char
|
||||
m->o = out[4]+out[5]+out[6]+out[7];
|
||||
m->d = m->e + m->o;
|
||||
|
||||
asm (
|
||||
__asm__ (
|
||||
// First loop to measure first four columns
|
||||
"movl $4, %%ecx \n\t"
|
||||
"pxor %%mm4, %%mm4 \n\t" // Past spacial noise
|
||||
|
@ -150,7 +150,7 @@ static inline void lineNoise_MMX(uint8_t *dst, uint8_t *src, int8_t *noise, int
|
||||
long mmx_len= len&(~7);
|
||||
noise+=shift;
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
"pcmpeqb %%mm7, %%mm7 \n\t"
|
||||
"psllw $15, %%mm7 \n\t"
|
||||
@ -179,7 +179,7 @@ static inline void lineNoise_MMX2(uint8_t *dst, uint8_t *src, int8_t *noise, int
|
||||
long mmx_len= len&(~7);
|
||||
noise+=shift;
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov %3, %%"REG_a" \n\t"
|
||||
"pcmpeqb %%mm7, %%mm7 \n\t"
|
||||
"psllw $15, %%mm7 \n\t"
|
||||
@ -220,7 +220,7 @@ static inline void lineNoise_C(uint8_t *dst, uint8_t *src, int8_t *noise, int le
|
||||
static inline void lineNoiseAvg_MMX(uint8_t *dst, uint8_t *src, int len, int8_t **shift){
|
||||
long mmx_len= len&(~7);
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"mov %5, %%"REG_a" \n\t"
|
||||
ASMALIGN(4)
|
||||
"1: \n\t"
|
||||
@ -357,10 +357,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){
|
||||
vf_clone_mpi_attributes(dmpi, mpi);
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t");
|
||||
if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
|
||||
#endif
|
||||
#ifdef HAVE_MMX2
|
||||
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t");
|
||||
if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
|
||||
#endif
|
||||
|
||||
return vf_next_put_image(vf,dmpi, pts);
|
||||
|
@ -157,7 +157,7 @@ static void dctB_c(DCTELEM *dst, DCTELEM *src){
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
static void dctB_mmx(DCTELEM *dst, DCTELEM *src){
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"movq (%0), %%mm0 \n\t"
|
||||
"movq 1*4*2(%0), %%mm1 \n\t"
|
||||
"paddw 6*4*2(%0), %%mm0 \n\t"
|
||||
@ -398,10 +398,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){
|
||||
}
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t");
|
||||
if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
|
||||
#endif
|
||||
#ifdef HAVE_MMX2
|
||||
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t");
|
||||
if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
|
||||
#endif
|
||||
|
||||
return vf_next_put_image(vf,dmpi, pts);
|
||||
|
@ -140,7 +140,7 @@ static int config(struct vf_instance_s* vf,
|
||||
unsigned int flags, unsigned int outfmt){
|
||||
|
||||
int sw, sh;
|
||||
//asm volatile("emms\n\t");
|
||||
//__asm__ volatile("emms\n\t");
|
||||
allocStuff(&vf->priv->luma, width, height);
|
||||
|
||||
getSubSampleFactors(&sw, &sh, outfmt);
|
||||
|
@ -558,7 +558,7 @@ void sws_getFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, Sw
|
||||
|
||||
#ifdef ARCH_X86
|
||||
if(gCpuCaps.hasMMX)
|
||||
asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
|
||||
__asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
|
||||
#endif
|
||||
if(firstTime)
|
||||
{
|
||||
|
@ -153,7 +153,7 @@ static void hardthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *pe
|
||||
|
||||
threshold1= qp*((1<<4) - bias) - 1;
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
|
||||
"movq " #src0 ", %%mm0 \n\t"\
|
||||
"movq " #src1 ", %%mm1 \n\t"\
|
||||
@ -221,7 +221,7 @@ static void softthresh_mmx(DCTELEM dst[64], DCTELEM src[64], int qp, uint8_t *pe
|
||||
|
||||
threshold1= qp*((1<<4) - bias) - 1;
|
||||
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
#undef REQUANT_CORE
|
||||
#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
|
||||
"movq " #src0 ", %%mm0 \n\t"\
|
||||
@ -334,7 +334,7 @@ static void store_slice_mmx(uint8_t *dst, int16_t *src, int dst_stride, int src_
|
||||
for(y=0; y<height; y++){
|
||||
uint8_t *dst1= dst;
|
||||
int16_t *src1= src;
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movq (%3), %%mm3 \n\t"
|
||||
"movq (%3), %%mm4 \n\t"
|
||||
"movd %4, %%mm2 \n\t"
|
||||
@ -500,10 +500,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){
|
||||
}
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t");
|
||||
if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
|
||||
#endif
|
||||
#ifdef HAVE_MMX2
|
||||
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t");
|
||||
if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
|
||||
#endif
|
||||
|
||||
return vf_next_put_image(vf,dmpi, pts);
|
||||
|
@ -58,7 +58,7 @@ static void qpel_li_3DNOW(unsigned char *d, unsigned char *s, int w, int h, int
|
||||
s += ss;
|
||||
}
|
||||
for (i=h-1; i; i--) {
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
"movq (%%"REG_S"), %%mm0 \n\t"
|
||||
"movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
|
||||
@ -78,7 +78,7 @@ static void qpel_li_3DNOW(unsigned char *d, unsigned char *s, int w, int h, int
|
||||
s += ss;
|
||||
}
|
||||
if (!up) fast_memcpy(d, s, w);
|
||||
asm volatile("emms \n\t" : : : "memory");
|
||||
__asm__ volatile("emms \n\t" : : : "memory");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -94,7 +94,7 @@ static void qpel_li_MMX2(unsigned char *d, unsigned char *s, int w, int h, int d
|
||||
s += ss;
|
||||
}
|
||||
for (i=h-1; i; i--) {
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"2: \n\t"
|
||||
"movq (%%"REG_S"), %%mm0 \n\t"
|
||||
@ -115,7 +115,7 @@ static void qpel_li_MMX2(unsigned char *d, unsigned char *s, int w, int h, int d
|
||||
s += ss;
|
||||
}
|
||||
if (!up) fast_memcpy(d, s, w);
|
||||
asm volatile("emms \n\t" : : : "memory");
|
||||
__asm__ volatile("emms \n\t" : : : "memory");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -131,7 +131,7 @@ static void qpel_li_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds
|
||||
s += ss;
|
||||
}
|
||||
for (i=h-1; i; i--) {
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pxor %%mm7, %%mm7 \n\t"
|
||||
"3: \n\t"
|
||||
"movq (%%"REG_S"), %%mm0 \n\t"
|
||||
@ -165,7 +165,7 @@ static void qpel_li_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds
|
||||
s += ss;
|
||||
}
|
||||
if (!up) fast_memcpy(d, s, w);
|
||||
asm volatile("emms \n\t" : : : "memory");
|
||||
__asm__ volatile("emms \n\t" : : : "memory");
|
||||
}
|
||||
|
||||
static void qpel_4tap_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
|
||||
@ -185,7 +185,7 @@ static void qpel_4tap_MMX(unsigned char *d, unsigned char *s, int w, int h, int
|
||||
d[j] = (s[j+ssd] + 3*s[j])>>2;
|
||||
d += ds; s += ss;
|
||||
for (i=h-3; i; i--) {
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pxor %%mm0, %%mm0 \n\t"
|
||||
"movq (%%"REG_d"), %%mm4 \n\t"
|
||||
"movq 8(%%"REG_d"), %%mm5 \n\t"
|
||||
@ -245,7 +245,7 @@ static void qpel_4tap_MMX(unsigned char *d, unsigned char *s, int w, int h, int
|
||||
d[j] = (s[j+ssd] + 3*s[j])>>2;
|
||||
d += ds; s += ss;
|
||||
if (!up) fast_memcpy(d, s, w);
|
||||
asm volatile("emms \n\t" : : : "memory");
|
||||
__asm__ volatile("emms \n\t" : : : "memory");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -194,11 +194,11 @@ static int put_image( struct vf_instance_s* vf, mp_image_t *mpi, double pts) {
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
if(gCpuCaps.hasMMX)
|
||||
asm volatile ("emms\n\t");
|
||||
__asm__ volatile ("emms\n\t");
|
||||
#endif
|
||||
#ifdef HAVE_MMX2
|
||||
if(gCpuCaps.hasMMX2)
|
||||
asm volatile ("sfence\n\t");
|
||||
__asm__ volatile ("sfence\n\t");
|
||||
#endif
|
||||
|
||||
return vf_next_put_image( vf, dmpi, pts);
|
||||
|
@ -292,10 +292,10 @@ static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){
|
||||
}
|
||||
|
||||
#ifdef HAVE_MMX
|
||||
if(gCpuCaps.hasMMX) asm volatile ("emms\n\t");
|
||||
if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
|
||||
#endif
|
||||
#ifdef HAVE_MMX2
|
||||
if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t");
|
||||
if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
|
||||
#endif
|
||||
|
||||
return vf_next_put_image(vf,dmpi, pts);
|
||||
|
@ -142,7 +142,7 @@ static void filter_line_mmx2(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, u
|
||||
|
||||
#define FILTER\
|
||||
for(x=0; x<w; x+=4){\
|
||||
asm volatile(\
|
||||
__asm__ volatile(\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
LOAD4("(%[cur],%[mrefs])", %%mm0) /* c = cur[x-refs] */\
|
||||
LOAD4("(%[cur],%[prefs])", %%mm1) /* e = cur[x+refs] */\
|
||||
@ -259,7 +259,7 @@ static void filter_line_mmx2(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, u
|
||||
[pb1] "m"(pb_1),\
|
||||
[mode] "g"(mode)\
|
||||
);\
|
||||
asm volatile("movd %%mm1, %0" :"=m"(*dst));\
|
||||
__asm__ volatile("movd %%mm1, %0" :"=m"(*dst));\
|
||||
dst += 4;\
|
||||
prev+= 4;\
|
||||
cur += 4;\
|
||||
@ -370,7 +370,7 @@ static void filter(struct vf_priv_s *p, uint8_t *dst[3], int dst_stride[3], int
|
||||
}
|
||||
}
|
||||
#if defined(HAVE_MMX) && defined(NAMED_ASM_ARGS)
|
||||
if(gCpuCaps.hasMMX2) asm volatile("emms \n\t" : : : "memory");
|
||||
if(gCpuCaps.hasMMX2) __asm__ volatile("emms \n\t" : : : "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -256,7 +256,7 @@ static void * RENAME(fast_memcpy)(void * to, const void * from, size_t len)
|
||||
// printf(" %d %d\n", (int)from&1023, (int)to&1023);
|
||||
// Pure Assembly cuz gcc is a bit unpredictable ;)
|
||||
if(i>=BLOCK_SIZE/64)
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"xor %%"REG_a", %%"REG_a" \n\t"
|
||||
ASMALIGN(4)
|
||||
"1: \n\t"
|
||||
|
@ -33,7 +33,7 @@ static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, u
|
||||
w=w>>1;
|
||||
#endif
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pcmpeqb %%mm5, %%mm5\n\t" // F..F
|
||||
"movq %%mm5, %%mm4\n\t"
|
||||
"movq %%mm5, %%mm7\n\t"
|
||||
@ -44,13 +44,13 @@ static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, u
|
||||
for(y=0;y<h;y++){
|
||||
register int x;
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
PREFETCHW" %0\n\t"
|
||||
PREFETCH" %1\n\t"
|
||||
PREFETCH" %2\n\t"
|
||||
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
|
||||
for(x=0;x<w;x+=8){
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movl %1, %%eax\n\t"
|
||||
"orl 4%1, %%eax\n\t"
|
||||
" jz 1f\n\t"
|
||||
@ -92,7 +92,7 @@ static inline void RENAME(vo_draw_alpha_yv12)(int w,int h, unsigned char* src, u
|
||||
dstbase+=dststride;
|
||||
}
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(EMMS:::"memory");
|
||||
__asm__ volatile(EMMS:::"memory");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
@ -103,7 +103,7 @@ static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, u
|
||||
w=w>>1;
|
||||
#endif
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pxor %%mm7, %%mm7\n\t"
|
||||
"pcmpeqb %%mm5, %%mm5\n\t" // F..F
|
||||
"movq %%mm5, %%mm6\n\t"
|
||||
@ -115,13 +115,13 @@ static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, u
|
||||
for(y=0;y<h;y++){
|
||||
register int x;
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
PREFETCHW" %0\n\t"
|
||||
PREFETCH" %1\n\t"
|
||||
PREFETCH" %2\n\t"
|
||||
::"m"(*dstbase),"m"(*srca),"m"(*src));
|
||||
for(x=0;x<w;x+=4){
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movl %1, %%eax\n\t"
|
||||
"orl %%eax, %%eax\n\t"
|
||||
" jz 1f\n\t"
|
||||
@ -164,7 +164,7 @@ static inline void RENAME(vo_draw_alpha_yuy2)(int w,int h, unsigned char* src, u
|
||||
dstbase+=dststride;
|
||||
}
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(EMMS:::"memory");
|
||||
__asm__ volatile(EMMS:::"memory");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
@ -196,7 +196,7 @@ static inline void RENAME(vo_draw_alpha_uyvy)(int w,int h, unsigned char* src, u
|
||||
static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
|
||||
int y;
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pxor %%mm7, %%mm7\n\t"
|
||||
"pcmpeqb %%mm6, %%mm6\n\t" // F..F
|
||||
::);
|
||||
@ -206,14 +206,14 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src,
|
||||
register int x;
|
||||
#if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX))
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
PREFETCHW" %0\n\t"
|
||||
PREFETCH" %1\n\t"
|
||||
PREFETCH" %2\n\t"
|
||||
::"m"(*dst),"m"(*srca),"m"(*src):"memory");
|
||||
for(x=0;x<w;x+=2){
|
||||
if(srca[x] || srca[x+1])
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
PREFETCHW" 32%0\n\t"
|
||||
PREFETCH" 32%1\n\t"
|
||||
PREFETCH" 32%2\n\t"
|
||||
@ -250,7 +250,7 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src,
|
||||
#else /* HAVE_MMX */
|
||||
for(x=0;x<w;x++){
|
||||
if(srca[x]){
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movzbl (%0), %%ecx\n\t"
|
||||
"movzbl 1(%0), %%eax\n\t"
|
||||
|
||||
@ -296,7 +296,7 @@ static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src,
|
||||
dstbase+=dststride;
|
||||
}
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(EMMS:::"memory");
|
||||
__asm__ volatile(EMMS:::"memory");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
@ -308,12 +308,12 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
|
||||
#endif
|
||||
#ifdef HAVE_MMX
|
||||
#ifdef HAVE_3DNOW
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pxor %%mm7, %%mm7\n\t"
|
||||
"pcmpeqb %%mm6, %%mm6\n\t" // F..F
|
||||
::);
|
||||
#else /* HAVE_3DNOW */
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"pxor %%mm7, %%mm7\n\t"
|
||||
"pcmpeqb %%mm5, %%mm5\n\t" // F..F
|
||||
"movq %%mm5, %%mm4\n\t"
|
||||
@ -327,14 +327,14 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
|
||||
#if defined(ARCH_X86) && (!defined(ARCH_X86_64) || defined(HAVE_MMX))
|
||||
#ifdef HAVE_MMX
|
||||
#ifdef HAVE_3DNOW
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
PREFETCHW" %0\n\t"
|
||||
PREFETCH" %1\n\t"
|
||||
PREFETCH" %2\n\t"
|
||||
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
|
||||
for(x=0;x<w;x+=2){
|
||||
if(srca[x] || srca[x+1])
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
PREFETCHW" 32%0\n\t"
|
||||
PREFETCH" 32%1\n\t"
|
||||
PREFETCH" 32%2\n\t"
|
||||
@ -362,13 +362,13 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
|
||||
:: "m" (dstbase[4*x]), "m" (srca[x]), "m" (src[x]));
|
||||
}
|
||||
#else //this is faster for intels crap
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
PREFETCHW" %0\n\t"
|
||||
PREFETCH" %1\n\t"
|
||||
PREFETCH" %2\n\t"
|
||||
::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
|
||||
for(x=0;x<w;x+=4){
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movl %1, %%eax\n\t"
|
||||
"orl %%eax, %%eax\n\t"
|
||||
" jz 1f\n\t"
|
||||
@ -417,7 +417,7 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
|
||||
#else /* HAVE_MMX */
|
||||
for(x=0;x<w;x++){
|
||||
if(srca[x]){
|
||||
asm volatile(
|
||||
__asm__ volatile(
|
||||
"movzbl (%0), %%ecx\n\t"
|
||||
"movzbl 1(%0), %%eax\n\t"
|
||||
"movzbl 2(%0), %%edx\n\t"
|
||||
@ -461,7 +461,7 @@ static inline void RENAME(vo_draw_alpha_rgb32)(int w,int h, unsigned char* src,
|
||||
dstbase+=dststride;
|
||||
}
|
||||
#ifdef HAVE_MMX
|
||||
asm volatile(EMMS:::"memory");
|
||||
__asm__ volatile(EMMS:::"memory");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
@ -4569,7 +4569,7 @@ static INT WINAPI expMessageBoxA(HWND hWnd, LPCSTR text, LPCSTR title, UINT type
|
||||
void exp_EH_prolog(void *dest);
|
||||
//! just a dummy function that acts a container for the asm section
|
||||
void exp_EH_prolog_dummy(void) {
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
// take care, this "function" may not change flags or
|
||||
// registers besides eax (which is also why we can't use
|
||||
// exp_EH_prolog_dummy directly)
|
||||
|
@ -693,7 +693,7 @@ typedef HANDLE *PHANDLE;
|
||||
#else /* __GNUC__ */
|
||||
# define ASM_GLOBAL_FUNC(name,code) \
|
||||
void asm_dummy_##name(void) { \
|
||||
asm( ".align 4\n\t" \
|
||||
__asm__( ".align 4\n\t" \
|
||||
".globl " ASM_NAME(#name) "\n\t" \
|
||||
".type " ASM_NAME(#name) ",@function\n" \
|
||||
ASM_NAME(#name) ":\n\t" \
|
||||
|
@ -18,7 +18,7 @@ static float attribute_used plus_1f = 1.0;
|
||||
void dct64_MMX_3dnow(short *a,short *b,real *c)
|
||||
{
|
||||
char tmp[256];
|
||||
__asm __volatile(
|
||||
__asm__ __volatile(
|
||||
" movl %2,%%eax\n\t"
|
||||
|
||||
" leal 128+%3,%%edx\n\t"
|
||||
|
@ -18,7 +18,7 @@ static float attribute_used plus_1f = 1.0;
|
||||
void dct64_MMX_3dnowex(short *a,short *b,real *c)
|
||||
{
|
||||
char tmp[256];
|
||||
__asm __volatile(
|
||||
__asm__ __volatile(
|
||||
" movl %2,%%eax\n\t"
|
||||
|
||||
" leal 128+%3,%%edx\n\t"
|
||||
|
@ -9,7 +9,7 @@
|
||||
void dct64_MMX(short *a,short *b,real *c)
|
||||
{
|
||||
char tmp[256];
|
||||
__asm __volatile(
|
||||
__asm__ __volatile(
|
||||
" movl %2,%%eax\n\t"
|
||||
/* Phase 1*/
|
||||
" flds (%%eax)\n\t"
|
||||
|
@ -32,7 +32,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
|
||||
for (i = 0; i < 0x20 / 2; i += 4)
|
||||
{
|
||||
asm(
|
||||
__asm__(
|
||||
"movaps %2, %%xmm3\n\t"
|
||||
"shufps $27, %%xmm3, %%xmm3\n\t"
|
||||
"movaps %3, %%xmm1\n\t"
|
||||
@ -57,7 +57,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
|
||||
for (i = 0; i < 0x20; i += 0x10)
|
||||
{
|
||||
asm(
|
||||
__asm__(
|
||||
"movaps %4, %%xmm1\n\t"
|
||||
"movaps %5, %%xmm3\n\t"
|
||||
"movaps %6, %%xmm4\n\t"
|
||||
@ -86,7 +86,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
|
||||
{
|
||||
real *costab = costab_mmx + 16;
|
||||
asm(
|
||||
__asm__(
|
||||
"movaps %4, %%xmm0\n\t"
|
||||
"movaps %5, %%xmm1\n\t"
|
||||
"movaps %8, %%xmm4\n\t"
|
||||
@ -116,7 +116,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
real *costab = costab_mmx + 24;
|
||||
int i;
|
||||
|
||||
asm(
|
||||
__asm__(
|
||||
"movaps %0, %%xmm0\n\t"
|
||||
"shufps $27, %%xmm0, %%xmm0\n\t"
|
||||
"movaps %1, %%xmm5\n\t"
|
||||
@ -127,7 +127,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
|
||||
for (i = 0; i < 0x20; i += 8)
|
||||
{
|
||||
asm(
|
||||
__asm__(
|
||||
"movaps %2, %%xmm2\n\t"
|
||||
"movaps %3, %%xmm3\n\t"
|
||||
"movaps %%xmm2, %%xmm4\n\t"
|
||||
@ -150,7 +150,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
asm(
|
||||
__asm__(
|
||||
"movss %0, %%xmm1\n\t"
|
||||
"movss %1, %%xmm0\n\t"
|
||||
"movaps %%xmm1, %%xmm3\n\t"
|
||||
@ -166,7 +166,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
|
||||
for (i = 0; i < 0x20; i += 8)
|
||||
{
|
||||
asm(
|
||||
__asm__(
|
||||
"movaps %2, %%xmm3\n\t"
|
||||
"movaps %%xmm3, %%xmm4\n\t"
|
||||
"shufps $20, %%xmm4, %%xmm4\n\t"
|
||||
@ -190,7 +190,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
|
||||
{
|
||||
int i;
|
||||
asm(
|
||||
__asm__(
|
||||
"movss %0, %%xmm0\n\t"
|
||||
"movaps %%xmm1, %%xmm2\n\t"
|
||||
"movaps %%xmm0, %%xmm7\n\t"
|
||||
@ -204,7 +204,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
|
||||
for (i = 0x8; i < 0x20; i += 8)
|
||||
{
|
||||
asm volatile (
|
||||
__asm__ volatile (
|
||||
"movaps %2, %%xmm1\n\t"
|
||||
"movaps %%xmm1, %%xmm3\n\t"
|
||||
"shufps $224, %%xmm3, %%xmm3\n\t"
|
||||
@ -285,7 +285,7 @@ void dct64_sse(short *out0,short *out1,real *c)
|
||||
To do saturation efficiently in x86 we can use fist(t)(p),
|
||||
pf2iw, or packssdw. We use fist(p) here.
|
||||
*/
|
||||
asm(
|
||||
__asm__(
|
||||
"flds %0\n\t"
|
||||
"flds (%2)\n\t"
|
||||
"fadds 4(%2)\n\t"
|
||||
|
@ -41,7 +41,7 @@ int synth_1to1_pent(real *bandPtr, int channel, short *samples)
|
||||
{
|
||||
real tmp[3];
|
||||
register int retval;
|
||||
__asm __volatile(
|
||||
__asm__ __volatile(
|
||||
" movl %%ebp,"MANGLE(saved_ebp)"\n\t"
|
||||
" movl %1,%%eax\n\t"/*bandPtr*/
|
||||
" movl %3,%%esi\n\t"
|
||||
|
@ -217,7 +217,7 @@ int synth_1to1_MMX(real *bandPtr, int channel, short *samples)
|
||||
dct64_MMX_func(a, b, bandPtr);
|
||||
window = mp3lib_decwins + 16 - bo1;
|
||||
//printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1);
|
||||
__asm __volatile(
|
||||
__asm__ __volatile(
|
||||
ASMALIGN(4)
|
||||
".L03:\n\t"
|
||||
"movq (%1),%%mm0\n\t"
|
||||
|
@ -99,11 +99,6 @@ void outl(U16_t, U32_t);
|
||||
|
||||
#else /* not _MINIX and _ACK */
|
||||
|
||||
# if defined(__STDC__) && (__STDC__ == 1)
|
||||
# ifndef NCR
|
||||
# define asm __asm
|
||||
# endif
|
||||
# endif
|
||||
# ifdef SVR4
|
||||
# include <sys/types.h>
|
||||
# ifndef __USLC__
|
||||
@ -115,8 +110,8 @@ void outl(U16_t, U32_t);
|
||||
#else
|
||||
# include "../common/scoasm.h"
|
||||
#endif
|
||||
#define intr_disable() asm("cli")
|
||||
#define intr_enable() asm("sti")
|
||||
#define intr_disable() __asm__("cli")
|
||||
#define intr_enable() __asm__("sti")
|
||||
|
||||
#endif /* _MINIX and _ACK */
|
||||
#endif /* __GNUC__ */
|
||||
|
@ -62,7 +62,7 @@ static __inline__ int enable_os_io(void)
|
||||
|
||||
/* Calling callgate with function 13 sets IOPL for the program */
|
||||
|
||||
asm volatile ("movl $13,%%ebx;.byte 0xff,0x1d;.long _callgate"
|
||||
__asm__ volatile ("movl $13,%%ebx;.byte 0xff,0x1d;.long _callgate"
|
||||
: /*no outputs */
|
||||
: /*no inputs */
|
||||
: "eax","ebx","ecx","edx","cc");
|
||||
|
Loading…
Reference in New Issue
Block a user