From 8725da49a2090de05b4b2d05e33727f45cb9d970 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 25 Jun 2012 14:33:24 +0100 Subject: [PATCH 1/4] x86: fft: win64: fix stack alignment for memcpy() call --- libavcodec/x86/fft_mmx.asm | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index 1cacfb7bd6..5143611533 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -640,19 +640,21 @@ cglobal fft_permute, 2,7,1 %if ARCH_X86_64 mov r0, r1 mov r1, r5 +%endif +%if WIN64 + sub rsp, 8 + call memcpy + add rsp, 8 + RET +%elif ARCH_X86_64 + jmp memcpy %else push r2 push r5 push r1 -%endif -%if ARCH_X86_64 && WIN64 == 0 - jmp memcpy -%else call memcpy -%if ARCH_X86_32 add esp, 12 -%endif - REP_RET + RET %endif cglobal imdct_calc, 3,5,3 From 963cdf39b4406ddc7ea124d71b8638b00da6c6a3 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 25 Jun 2012 13:43:32 +0100 Subject: [PATCH 2/4] x86: cpu: whitespace (mostly) cosmetics This adds whitespace around operators, aligns line continuation backslashes, and breaks long lines. Also fixes an ifdef halfway through a statement. The one line of duplication this saved is not worth the ugliness. Signed-off-by: Mans Rullgard --- libavutil/x86/cpu.c | 68 +++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index b87d3a3a92..f61add0ded 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -26,16 +26,15 @@ #include "libavutil/cpu.h" /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ -#define cpuid(index,eax,ebx,ecx,edx)\ - __asm__ volatile\ - ("mov %%"REG_b", %%"REG_S"\n\t"\ - "cpuid\n\t"\ - "xchg %%"REG_b", %%"REG_S\ - : "=a" (eax), "=S" (ebx),\ - "=c" (ecx), "=d" (edx)\ - : "0" (index)); +#define cpuid(index, eax, ebx, ecx, edx) \ + __asm__ volatile ( \ + "mov %%"REG_b", %%"REG_S" \n\t" \ + "cpuid \n\t" \ + "xchg %%"REG_b", %%"REG_S \ + : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \ + : "0" (index)) -#define xgetbv(index,eax,edx) \ +#define xgetbv(index, eax, edx) \ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index)) /* Function to test if multimedia instructions are supported... */ @@ -43,8 +42,8 @@ int ff_get_cpu_flags_x86(void) { int rval = 0; int eax, ebx, ecx, edx; - int max_std_level, max_ext_level, std_caps=0, ext_caps=0; - int family=0, model=0; + int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0; + int family = 0, model = 0; union { int i[3]; char c[12]; } vendor; #if ARCH_X86_32 @@ -79,19 +78,20 @@ int ff_get_cpu_flags_x86(void) vendor.i[1] = edx; vendor.i[2] = ecx; - if(max_std_level >= 1){ + if (max_std_level >= 1) { cpuid(1, eax, ebx, ecx, std_caps); - family = ((eax>>8)&0xf) + ((eax>>20)&0xff); - model = ((eax>>4)&0xf) + ((eax>>12)&0xf0); + family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); + model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0); if (std_caps & (1 << 15)) rval |= AV_CPU_FLAG_CMOV; - if (std_caps & (1<<23)) + if (std_caps & (1 << 23)) rval |= AV_CPU_FLAG_MMX; - if (std_caps & (1<<25)) - rval |= AV_CPU_FLAG_MMX2 + if (std_caps & (1 << 25)) + rval |= AV_CPU_FLAG_MMX2; #if HAVE_SSE - | AV_CPU_FLAG_SSE; - if (std_caps & (1<<26)) + if (std_caps & (1 << 25)) + rval |= AV_CPU_FLAG_SSE; + if (std_caps & (1 << 26)) rval |= AV_CPU_FLAG_SSE2; if (ecx & 1) rval |= AV_CPU_FLAG_SSE3; @@ -111,20 +111,19 @@ int ff_get_cpu_flags_x86(void) } #endif #endif - ; } cpuid(0x80000000, max_ext_level, ebx, ecx, edx); - if(max_ext_level >= 0x80000001){ + if (max_ext_level >= 0x80000001) { cpuid(0x80000001, eax, ebx, ecx, ext_caps); - if (ext_caps & (1U<<31)) + if (ext_caps & (1U << 31)) rval |= AV_CPU_FLAG_3DNOW; - if (ext_caps & (1<<30)) + if (ext_caps & (1 << 30)) rval |= AV_CPU_FLAG_3DNOWEXT; - if (ext_caps & (1<<23)) + if (ext_caps & (1 << 23)) rval |= AV_CPU_FLAG_MMX; - if (ext_caps & (1<<22)) + if (ext_caps & (1 << 22)) rval |= AV_CPU_FLAG_MMX2; /* Allow for selectively disabling SSE2 functions on AMD processors @@ -151,14 +150,17 @@ int ff_get_cpu_flags_x86(void) if (!strncmp(vendor.c, "GenuineIntel", 12)) { if (family == 6 && (model == 9 || model == 13 || model == 14)) { - /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah") - * theoretically support sse2, but it's usually slower than mmx, - * so let's just pretend they don't. AV_CPU_FLAG_SSE2 is disabled and - * AV_CPU_FLAG_SSE2SLOW is enabled so that SSE2 is not used unless - * explicitly enabled by checking AV_CPU_FLAG_SSE2SLOW. The same - * situation applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */ - if (rval & AV_CPU_FLAG_SSE2) rval ^= AV_CPU_FLAG_SSE2SLOW|AV_CPU_FLAG_SSE2; - if (rval & AV_CPU_FLAG_SSE3) rval ^= AV_CPU_FLAG_SSE3SLOW|AV_CPU_FLAG_SSE3; + /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and + * 6/14 (core1 "yonah") theoretically support sse2, but it's + * usually slower than mmx, so let's just pretend they don't. + * AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is + * enabled so that SSE2 is not used unless explicitly enabled + * by checking AV_CPU_FLAG_SSE2SLOW. The same situation + * applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */ + if (rval & AV_CPU_FLAG_SSE2) + rval ^= AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2; + if (rval & AV_CPU_FLAG_SSE3) + rval ^= AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3; } /* The Atom processor has SSSE3 support, which is useful in many cases, * but sometimes the SSSE3 version is slower than the SSE2 equivalent From 14a34d90ad1c8a47464937c6df4bda83451c07af Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Sun, 24 Jun 2012 20:37:25 -0400 Subject: [PATCH 3/4] lavr: x86: merge some branches --- libavresample/x86/audio_convert_init.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/libavresample/x86/audio_convert_init.c b/libavresample/x86/audio_convert_init.c index f41d974445..637fd2fb14 100644 --- a/libavresample/x86/audio_convert_init.c +++ b/libavresample/x86/audio_convert_init.c @@ -53,14 +53,6 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); } - if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { - ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, - 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); - } - if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { - ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, - 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); - } if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, @@ -80,12 +72,16 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, 0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4); + ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, + 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); } if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, 0, 32, 16, "AVX", ff_conv_s32_to_flt_avx); ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, 0, 32, 32, "AVX", ff_conv_flt_to_s32_avx); + ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, + 6, 16, 4, "AVX", ff_conv_fltp_to_flt_6ch_avx); } #endif } From 246154a9aff222b80befd65faf97494ce9113306 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sun, 24 Jun 2012 20:57:35 +0300 Subject: [PATCH 4/4] log: Include io.h on windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required for isatty, which exists on MSVC and is found by configure, but is provided by io.h instead of unistd.h. Signed-off-by: Martin Storsjö --- libavutil/log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libavutil/log.c b/libavutil/log.c index e2773d433d..9f1d59ab9c 100644 --- a/libavutil/log.c +++ b/libavutil/log.c @@ -39,6 +39,7 @@ static int flags; #if defined(_WIN32) && !defined(__MINGW32CE__) #include +#include static const uint8_t color[] = { 12, 12, 12, 14, 7, 10, 11 }; static int16_t background, attr_orig; static HANDLE con;