diff --git a/Makefile b/Makefile
index 357eb48db0..60613190b2 100644
--- a/Makefile
+++ b/Makefile
@@ -391,7 +391,7 @@ SRCS_COMMON-$(MP3LIB)                += libmpcodecs/ad_mp3lib.c mp3lib/sr1.c
 SRCS_COMMON-$(MP3LIB)-$(ARCH_X86_32) += mp3lib/decode_i586.c
 SRCS_COMMON-$(MP3LIB)-$(ARCH_X86_32)-$(HAVE_3DNOW)    += mp3lib/dct36_3dnow.c \
                                                          mp3lib/dct64_3dnow.c
-SRCS_COMMON-$(MP3LIB)-$(ARCH_X86_32)-$(HAVE_3DNOWEX)  += mp3lib/dct36_k7.c \
+SRCS_COMMON-$(MP3LIB)-$(ARCH_X86_32)-$(HAVE_3DNOWEXT) += mp3lib/dct36_k7.c \
                                                          mp3lib/dct64_k7.c
 SRCS_COMMON-$(MP3LIB)-$(ARCH_X86_32)-$(HAVE_MMX)      += mp3lib/dct64_mmx.c
 SRCS_COMMON-$(MP3LIB)-$(HAVE_ALTIVEC) += mp3lib/dct64_altivec.c
@@ -852,7 +852,7 @@ version.h:
 ###### dependency declarations / specific CFLAGS ######
 
 codec-cfg.o: codecs.conf.h
-mencoder.o mplayer.o vobsub.o gui/win32/gui.o libmpdemux/muxer_avi.o osdep/mplayer-rc.o stream/network.o stream/stream_cddb.o: version.h
+mpcommon.o vobsub.o gui/win32/gui.o libmpdemux/muxer_avi.o osdep/mplayer-rc.o stream/network.o stream/stream_cddb.o: version.h
 $(DEPS): help_mp.h
 
 libdvdcss/%.o libdvdcss/%.d: CFLAGS += -D__USE_UNIX98 -D_GNU_SOURCE -DVERSION=\"1.2.9\" $(CFLAGS_LIBDVDCSS)
diff --git a/av_opts.h b/av_opts.h
index a9eab9c8a5..640443a352 100644
--- a/av_opts.h
+++ b/av_opts.h
@@ -19,7 +19,12 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#ifndef MPLAYER_AV_OPTS_H
+#define MPLAYER_AV_OPTS_H
+
 /**
  * Parses str and sets AVOptions in v accordingly.
  */
 int parse_avopts(void *v, char *str);
+
+#endif /* MPLAYER_AV_OPTS_H */
diff --git a/configure b/configure
index 283915f84f..dbea3fbb30 100755
--- a/configure
+++ b/configure
@@ -2617,12 +2617,12 @@ EOF
   echores "$_iwmmxt"
 fi
 
-_cpuexts_all='ALTIVEC BSWAP MMX MMX2 3DNOW 3DNOWEX SSE SSE2 SSSE3 FAST_CMOV CMOV PLD ARMV5TE ARMV6 ARMV6T2 ARMVFP IWMMXT MMI VIS MVI'
+_cpuexts_all='ALTIVEC BSWAP MMX MMX2 3DNOW 3DNOWEXT SSE SSE2 SSSE3 FAST_CMOV CMOV PLD ARMV5TE ARMV6 ARMV6T2 ARMVFP IWMMXT MMI VIS MVI'
 test "$_altivec"   = yes && _cpuexts="ALTIVEC $_cpuexts"
 test "$_mmx"       = yes && _cpuexts="MMX $_cpuexts"
 test "$_mmxext"    = yes && _cpuexts="MMX2 $_cpuexts"
 test "$_3dnow"     = yes && _cpuexts="3DNOW $_cpuexts"
-test "$_3dnowext"  = yes && _cpuexts="3DNOWEX $_cpuexts"
+test "$_3dnowext"  = yes && _cpuexts="3DNOWEXT $_cpuexts"
 test "$_sse"       = yes && _cpuexts="SSE $_cpuexts"
 test "$_sse2"      = yes && _cpuexts="SSE2 $_cpuexts"
 test "$_ssse3"     = yes && _cpuexts="SSSE3 $_cpuexts"
diff --git a/cpudetect.c b/cpudetect.c
index 9756dfb454..fee0c8fc1f 100644
--- a/cpudetect.c
+++ b/cpudetect.c
@@ -232,7 +232,7 @@ void GetCpuCaps( CpuCaps *caps)
 	if(caps->has3DNow) mp_msg(MSGT_CPUDETECT,MSGL_WARN,"3DNow supported but disabled\n");
 	caps->has3DNow=0;
 #endif
-#if !HAVE_3DNOWEX
+#if !HAVE_3DNOWEXT
 	if(caps->has3DNowExt) mp_msg(MSGT_CPUDETECT,MSGL_WARN,"3DNowExt supported but disabled\n");
 	caps->has3DNowExt=0;
 #endif
diff --git a/input/lirc.c b/input/lirc.c
index fec56f4828..4012d5b340 100644
--- a/input/lirc.c
+++ b/input/lirc.c
@@ -76,7 +76,7 @@ int mp_input_lirc_read(int fd,char* dest, int s) {
     return w;
   }
       
-  // Nothing in the buffer, pool the lirc fd
+  // Nothing in the buffer, poll the lirc fd
   FD_ZERO(&fds);
   FD_SET(fd,&fds);
   memset(&tv,0,sizeof(tv));
diff --git a/liba52/imdct.c b/liba52/imdct.c
index 9ad36249d6..c6253b672d 100644
--- a/liba52/imdct.c
+++ b/liba52/imdct.c
@@ -53,8 +53,8 @@
 void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias);
 
 #ifdef RUNTIME_CPUDETECT
-#undef HAVE_3DNOWEX
-#define HAVE_3DNOWEX 0
+#undef HAVE_3DNOWEXT
+#define HAVE_3DNOWEXT 0
 #endif
 
 typedef struct complex_s {
@@ -720,11 +720,11 @@ const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000
 const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; 
 const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };
 
-#undef HAVE_3DNOWEX
-#define HAVE_3DNOWEX 0
+#undef HAVE_3DNOWEXT
+#define HAVE_3DNOWEXT 0
 #include "imdct_3dnow.h"
-#undef HAVE_3DNOWEX
-#define HAVE_3DNOWEX 1
+#undef HAVE_3DNOWEXT
+#define HAVE_3DNOWEXT 1
 #include "imdct_3dnow.h"
 
 void
diff --git a/liba52/imdct_3dnow.h b/liba52/imdct_3dnow.h
index eeab33b27c..048aa7baa7 100644
--- a/liba52/imdct_3dnow.h
+++ b/liba52/imdct_3dnow.h
@@ -26,7 +26,7 @@
 #undef FFT_ASMB16_3DNOW
 #undef FFT_128P_3DNOW
 
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 #define FFT_4_3DNOW fft_4_3dnowex
 #define FFT_8_3DNOW fft_8_3dnowex
 #define FFT_ASMB_3DNOW fft_asmb_3dnowex
@@ -52,7 +52,7 @@ static void FFT_4_3DNOW(complex_t *x)
 	"pxor   %3, %%mm1\n\t" /* -mm1.re | mm1.im */
 	"pfadd	%%mm1, %%mm3\n\t" /* vi.im = x[3].re - x[1].re; */
 	"movq	%%mm3, %%mm4\n\t" /* vi.re =-x[3].im + x[1].im; mm4 = vi */
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 	"pswapd %%mm4, %%mm4\n\t"
 #else
 	"punpckldq %%mm4, %%mm5\n\t"
@@ -129,7 +129,7 @@ static void FFT_8_3DNOW(complex_t *x)
       "movq	(%1),	%%mm1\n\t"
       "movq	16(%1),	%%mm4\n\t"
       "movq	%%mm1,	%%mm2\n\t"
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
       "pswapd	%%mm3,	%%mm3\n\t"
 #else
       "punpckldq %%mm3,	%%mm6\n\t"
@@ -160,7 +160,7 @@ static void FFT_8_3DNOW(complex_t *x)
 	"movq	%2,	%%mm1\n\t"
 	"movq	56(%3),	%%mm3\n\t"
 	"pfsub	40(%3),	%%mm0\n\t"
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 	"pswapd	%%mm1,	%%mm1\n\t"
 #else
 	"punpckldq %%mm1, %%mm2\n\t"
@@ -168,7 +168,7 @@ static void FFT_8_3DNOW(complex_t *x)
 #endif
 	"pxor	%%mm7,	%%mm1\n\t"
 	"pfadd	%%mm1,	%%mm0\n\t"
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 	"pswapd	%%mm3,	%%mm3\n\t"
 #else
 	"punpckldq %%mm3, %%mm2\n\t"
@@ -182,7 +182,7 @@ static void FFT_8_3DNOW(complex_t *x)
 	"pfmul	%4,	%%mm0\n\t"
 	
 	"movq	40(%3),	%%mm5\n\t"
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 	"pswapd	%%mm5,	%%mm5\n\t"
 #else
 	"punpckldq %%mm5, %%mm1\n\t"
@@ -205,7 +205,7 @@ static void FFT_8_3DNOW(complex_t *x)
   /* x[3] x[7] */
   __asm__ volatile(
 	"movq	%1,	%%mm0\n\t"
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 	"pswapd	%3,	%%mm1\n\t"
 #else
 	"movq	%3,	%%mm1\n\t"
@@ -218,7 +218,7 @@ static void FFT_8_3DNOW(complex_t *x)
 	"movq	56(%4),	%%mm3\n\t"
 	"pxor	%%mm7,	%%mm3\n\t"
 	"pfadd	%%mm3,	%%mm2\n\t"
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 	"pswapd	%%mm2,	%%mm2\n\t"
 #else
 	"punpckldq %%mm2, %%mm5\n\t"
@@ -331,7 +331,7 @@ static void FFT_128P_3DNOW(complex_t *a)
 }
 
 static void
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 imdct_do_512_3dnowex
 #else
 imdct_do_512_3dnow
@@ -371,14 +371,14 @@ imdct_do_512_3dnow
 		"punpckldq %4, %%mm1\n\t" /* mm1 = xcos[j] | xsin[j] */
 		"movq	%%mm0, %%mm2\n\t"
 		"pfmul	%%mm1, %%mm0\n\t"
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 		"pswapd	%%mm1, %%mm1\n\t"
 #else
 		"punpckldq %%mm1, %%mm5\n\t"
 		"punpckhdq %%mm5, %%mm1\n\t"
 #endif
 		"pfmul	%%mm1, %%mm2\n\t"
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 		"pfpnacc %%mm2, %%mm0\n\t"
 #else
 		"pxor	%%mm7, %%mm0\n\t"
@@ -445,7 +445,7 @@ imdct_do_512_3dnow
 	    __asm__ volatile (
 		"movq %1, %%mm0\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
 		"movq %%mm0, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
-#if !HAVE_3DNOWEX
+#if !HAVE_3DNOWEXT
 		"punpckldq %%mm1, %%mm2\n\t"
 		"punpckhdq %%mm2, %%mm1\n\t"
 #else			 
@@ -455,7 +455,7 @@ imdct_do_512_3dnow
 		"punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */
 		"pfmul %%mm3, %%mm0\n\t"
 		"pfmul %%mm3, %%mm1\n\t"
-#if !HAVE_3DNOWEX
+#if !HAVE_3DNOWEXT
 		"pxor  %%mm7, %%mm0\n\t"
 		"pfacc %%mm1, %%mm0\n\t"
 		"punpckldq %%mm0, %%mm1\n\t"
@@ -543,7 +543,7 @@ imdct_do_512_3dnow
 		"movd	(%1), %%mm1\n\t"
 		"punpckldq (%2), %%mm0\n\t"
 		"punpckldq 508(%2), %%mm1\n\t"
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 		"pswapd	(%3), %%mm3\n\t"
 		"pswapd	-512(%3), %%mm4\n\t"
 #else
diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff
index e5008a77cc..9840a05f02 100644
--- a/liba52/liba52_changes.diff
+++ b/liba52/liba52_changes.diff
@@ -1412,7 +1412,7 @@
 +void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias);
 +
 +#ifdef RUNTIME_CPUDETECT
-+#undef HAVE_3DNOWEX
++#undef HAVE_3DNOWEXT
 +#endif
  
  typedef struct complex_s {
@@ -1862,9 +1862,9 @@
 +const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }}; 
 +const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };
 +
-+#undef HAVE_3DNOWEX
++#undef HAVE_3DNOWEXT
 +#include "imdct_3dnow.h"
-+#define HAVE_3DNOWEX
++#define HAVE_3DNOWEXT
 +#include "imdct_3dnow.h"
 +
 +void
diff --git a/liba52/srfftp_3dnow.h b/liba52/srfftp_3dnow.h
index 1d66c5b89c..7d05d9e09f 100644
--- a/liba52/srfftp_3dnow.h
+++ b/liba52/srfftp_3dnow.h
@@ -46,7 +46,7 @@ typedef struct
 	"m"(x_minus_plus_3dnow)\
 	:"memory");
 
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 #define PSWAP_MM(mm_base,mm_hlp) "pswapd	"mm_base","mm_base"\n\t"
 #else
 #define PSWAP_MM(mm_base,mm_hlp)\
@@ -54,7 +54,7 @@ typedef struct
 	"psrlq $32, "mm_base"\n\t"\
 	"punpckldq "mm_hlp","mm_base"\n\t"
 #endif
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
 #define PFNACC_MM(mm_base,mm_hlp)	"pfnacc	"mm_base","mm_base"\n\t"
 #else
 #define PFNACC_MM(mm_base,mm_hlp)\
diff --git a/libaf/af_resample.c b/libaf/af_resample.c
index 0953217c4c..2536c0706d 100644
--- a/libaf/af_resample.c
+++ b/libaf/af_resample.c
@@ -25,6 +25,7 @@
 #include <inttypes.h>
 
 #include "libavutil/common.h"
+#include "libavutil/mathematics.h"
 #include "af.h"
 #include "dsp.h"
 
diff --git a/libmpcodecs/vf_fspp.c b/libmpcodecs/vf_fspp.c
index 7c42819d20..5ba6188d15 100644
--- a/libmpcodecs/vf_fspp.c
+++ b/libmpcodecs/vf_fspp.c
@@ -43,6 +43,7 @@
 #include "mp_msg.h"
 #include "cpudetect.h"
 
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mem.h"
 #include "libavcodec/avcodec.h"
@@ -57,6 +58,9 @@
 #include "vf.h"
 #include "libvo/fastmemcpy.h"
 
+#undef free
+#undef malloc
+
 //===========================================================================//
 #define BLOCKSZ 12
 
diff --git a/libmpcodecs/vf_mcdeint.c b/libmpcodecs/vf_mcdeint.c
index f1b816d8ba..6983aa5163 100644
--- a/libmpcodecs/vf_mcdeint.c
+++ b/libmpcodecs/vf_mcdeint.c
@@ -56,10 +56,15 @@ Known Issues:
 #include "mp_msg.h"
 #include "cpudetect.h"
 
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/dsputil.h"
 
+#undef fprintf
+#undef free
+#undef malloc
+
 #ifdef HAVE_MALLOC_H
 #include <malloc.h>
 #endif
diff --git a/libmpcodecs/vf_pp.c b/libmpcodecs/vf_pp.c
index 219fe920f1..c910595c4a 100644
--- a/libmpcodecs/vf_pp.c
+++ b/libmpcodecs/vf_pp.c
@@ -15,6 +15,7 @@
 #include "img_format.h"
 #include "mp_image.h"
 #include "vf.h"
+#include "libavutil/internal.h"
 #include "libpostproc/postprocess.h"
 
 #ifdef CONFIG_LIBPOSTPROC_A
@@ -22,6 +23,8 @@
 #include "libpostproc/postprocess_internal.h"
 #endif
 
+#undef malloc
+
 struct vf_priv_s {
     int pp;
     pp_mode_t *ppMode[PP_QUALITY_MAX+1];
diff --git a/libmpcodecs/vf_spp.c b/libmpcodecs/vf_spp.c
index 50969c13fb..aa18534966 100644
--- a/libmpcodecs/vf_spp.c
+++ b/libmpcodecs/vf_spp.c
@@ -37,10 +37,15 @@
 #include "mp_msg.h"
 #include "cpudetect.h"
 
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/dsputil.h"
 
+#undef fprintf
+#undef free
+#undef malloc
+
 #ifdef HAVE_MALLOC_H
 #include <malloc.h>
 #endif
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 5b79fea761..99be2a4f27 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -139,11 +139,11 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
 //plain C versions
 #undef HAVE_MMX
 #undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
 #undef HAVE_SSE2
 #define HAVE_MMX 0
 #define HAVE_MMX2 0
-#define HAVE_3DNOW 0
+#define HAVE_AMD3DNOW 0
 #define HAVE_SSE2 0
 #define RENAME(a) a ## _C
 #include "rgb2rgb_template.c"
@@ -167,9 +167,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
 //3DNOW versions
 #undef RENAME
 #undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
 #define HAVE_MMX2 0
-#define HAVE_3DNOW 1
+#define HAVE_AMD3DNOW 1
 #define RENAME(a) a ## _3DNOW
 #include "rgb2rgb_template.c"
 
@@ -183,7 +183,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask)  = 0x0000001f0000001fULL;
 */
 
 void sws_rgb2rgb_init(int flags){
-#if (HAVE_MMX2 || HAVE_3DNOW || HAVE_MMX)  && CONFIG_GPL
+#if (HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX)  && CONFIG_GPL
     if (flags & SWS_CPU_CAPS_MMX2)
         rgb2rgb_init_MMX2();
     else if (flags & SWS_CPU_CAPS_3DNOW)
@@ -191,7 +191,7 @@ void sws_rgb2rgb_init(int flags){
     else if (flags & SWS_CPU_CAPS_MMX)
         rgb2rgb_init_MMX();
     else
-#endif /* HAVE_MMX2 || HAVE_3DNOW || HAVE_MMX */
+#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
         rgb2rgb_init_C();
 }
 
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index b988ca0465..f8915e8df2 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -43,7 +43,7 @@
 #define MMREG_SIZE 8
 #endif
 
-#if HAVE_3DNOW
+#if HAVE_AMD3DNOW
 #define PREFETCH  "prefetch"
 #define PREFETCHW "prefetchw"
 #define PAVGB     "pavgusb"
@@ -56,7 +56,7 @@
 #define PREFETCHW " # nop"
 #endif
 
-#if HAVE_3DNOW
+#if HAVE_AMD3DNOW
 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
 #define EMMS     "femms"
 #else
@@ -1914,7 +1914,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
         dst+= dstStride;
 
     for (y=1; y<srcHeight; y++){
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
         const long mmxSize= srcWidth&~15;
         __asm__ volatile(
         "mov           %4, %%"REG_a"            \n\t"
@@ -2229,7 +2229,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
         "1:                                         \n\t"
         PREFETCH"    64(%0, %%"REG_d")              \n\t"
         PREFETCH"    64(%1, %%"REG_d")              \n\t"
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
         "movq          (%0, %%"REG_d"), %%mm0       \n\t"
         "movq          (%1, %%"REG_d"), %%mm1       \n\t"
         "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
@@ -2290,7 +2290,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
         "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
         "psraw                      $7, %%mm0       \n\t"
 
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
         "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
         "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
         "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index a22bf64b71..88ac39757e 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -46,7 +46,7 @@ tested special converters (most are tested actually, but I did not write it down
  YVU9 -> YV12
 
 untested special converters
-  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok)
+  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
   YV12/I420 -> YV12/I420
   YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
   BGR24 -> BGR32 & RGB24 -> RGB32
@@ -83,15 +83,15 @@ unsigned swscale_version(void)
 #undef PAVGB
 
 //#undef HAVE_MMX2
-//#define HAVE_3DNOW
+//#define HAVE_AMD3DNOW
 //#undef HAVE_MMX
 //#undef ARCH_X86
 //#define WORDS_BIGENDIAN
 #define DITHER1XBPP
 
-#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
+#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
 
-#define RET 0xC3 //near return opcode for X86
+#define RET 0xC3 //near return opcode for x86
 
 #ifdef M_PI
 #define PI M_PI
@@ -194,10 +194,10 @@ Special versions: fast Y 1:1 scaling (no interpolation in y direction)
 TODO
 more intelligent misalignment avoidance for the horizontal scaler
 write special vertical cubic upscale version
-Optimize C code (yv12 / minmax)
-add support for packed pixel yuv input & output
+optimize C code (YV12 / minmax)
+add support for packed pixel YUV input & output
 add support for Y8 output
-optimize bgr24 & bgr32
+optimize BGR24 & BGR32
 add BGR4 output support
 write special BGR->BGR scaler
 */
@@ -257,7 +257,7 @@ DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
 
 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
 
-#endif /* ARCH_X86 */
+#endif /* ARCH_X86 && CONFIG_GPL */
 
 // clipping helper table for C implementations:
 static unsigned char clip_table[768];
@@ -471,7 +471,7 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
                                uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
 {
-    //FIXME Optimize (just quickly writen not opti..)
+    //FIXME Optimize (just quickly written not optimized..)
     int i;
     for (i=0; i<dstW; i++)
     {
@@ -504,7 +504,7 @@ static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFil
                                 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
                                 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
 {
-    //FIXME Optimize (just quickly writen not opti..)
+    //FIXME Optimize (just quickly written not optimized..)
     int i;
     for (i=0; i<dstW; i++)
     {
@@ -653,7 +653,7 @@ static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFil
         }
 
 #define YSCALE_YUV_2_RGBX_C(type) \
-    YSCALE_YUV_2_PACKEDX_C(type)  /* FIXME fix tables so that cliping is not needed and then use _NOCLIP*/\
+    YSCALE_YUV_2_PACKEDX_C(type)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
     r = (type *)c->table_rV[V];   \
     g = (type *)(c->table_gU[U] + c->table_gV[V]); \
     b = (type *)c->table_bU[U];   \
@@ -953,7 +953,7 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
     }
 }
 
-//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
+//Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
 //Plain C versions
 #if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL
 #define COMPILE_C
@@ -961,13 +961,14 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
 
 #if ARCH_PPC
 #if (HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#undef COMPILE_C
 #define COMPILE_ALTIVEC
-#endif //HAVE_ALTIVEC
+#endif
 #endif //ARCH_PPC
 
 #if ARCH_X86
 
-#if ((HAVE_MMX && !HAVE_3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
 #define COMPILE_MMX
 #endif
 
@@ -975,18 +976,18 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
 #define COMPILE_MMX2
 #endif
 
-#if ((HAVE_3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
+#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
 #define COMPILE_3DNOW
 #endif
-#endif //ARCH_X86 || ARCH_X86_64
+#endif //ARCH_X86
 
 #undef HAVE_MMX
 #undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
 #undef HAVE_ALTIVEC
 #define HAVE_MMX 0
 #define HAVE_MMX2 0
-#define HAVE_3DNOW 0
+#define HAVE_AMD3DNOW 0
 #define HAVE_ALTIVEC 0
 
 #ifdef COMPILE_C
@@ -1004,12 +1005,12 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
 
 #if ARCH_X86
 
-//X86 versions
+//x86 versions
 /*
 #undef RENAME
 #undef HAVE_MMX
 #undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
 #define ARCH_X86
 #define RENAME(a) a ## _X86
 #include "swscale_template.c"
@@ -1019,10 +1020,10 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
 #undef RENAME
 #undef HAVE_MMX
 #undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
 #define HAVE_MMX 1
 #define HAVE_MMX2 0
-#define HAVE_3DNOW 0
+#define HAVE_AMD3DNOW 0
 #define RENAME(a) a ## _MMX
 #include "swscale_template.c"
 #endif
@@ -1032,10 +1033,10 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
 #undef RENAME
 #undef HAVE_MMX
 #undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
 #define HAVE_MMX 1
 #define HAVE_MMX2 1
-#define HAVE_3DNOW 0
+#define HAVE_AMD3DNOW 0
 #define RENAME(a) a ## _MMX2
 #include "swscale_template.c"
 #endif
@@ -1045,17 +1046,17 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
 #undef RENAME
 #undef HAVE_MMX
 #undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
 #define HAVE_MMX 1
 #define HAVE_MMX2 0
-#define HAVE_3DNOW 1
+#define HAVE_AMD3DNOW 1
 #define RENAME(a) a ## _3DNow
 #include "swscale_template.c"
 #endif
 
-#endif //ARCH_X86 || ARCH_X86_64
+#endif //ARCH_X86
 
-// minor note: the HAVE_xyz is messed up after that line so don't use it
+// minor note: the HAVE_xyz are messed up after this line so don't use them
 
 static double getSplineCoeff(double a, double b, double c, double d, double dist)
 {
@@ -1085,7 +1086,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
         __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
 #endif
 
-    // Note the +1 is for the MMXscaler which reads over the end
+    // NOTE: the +1 is for the MMX scaler which reads over the end
     *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
 
     if (FFABS(xInc - 0x10000) <10) // unscaled
@@ -1134,7 +1135,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
             int j;
 
             (*filterPos)[i]= xx;
-                //Bilinear upscale / linear interpolate / Area averaging
+                //bilinear upscale / linear interpolate / area averaging
                 for (j=0; j<filterSize; j++)
                 {
                     int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
@@ -1315,7 +1316,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
             /* preserve monotonicity because the core can't handle the filter otherwise */
             if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
 
-            // Move filter coeffs left
+            // move filter coefficients left
             for (k=1; k<filter2Size; k++)
                 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
             filter2[i*filter2Size + k - 1]= 0;
@@ -1341,10 +1342,10 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
         if (minFilterSize < 5)
             filterAlign = 4;
 
-        // we really don't want to waste our time
-        // doing useless computation, so fall-back on
-        // the scalar C code for very small filter.
-        // vectorizing is worth it only if you have
+        // We really don't want to waste our time
+        // doing useless computation, so fall back on
+        // the scalar C code for very small filters.
+        // Vectorizing is worth it only if you have a
         // decent-sized vector.
         if (minFilterSize < 3)
             filterAlign = 1;
@@ -1381,7 +1382,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
     }
 
 
-    //FIXME try to align filterpos if possible
+    //FIXME try to align filterPos if possible
 
     //fix borders
     for (i=0; i<dstW; i++)
@@ -1389,7 +1390,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
         int j;
         if ((*filterPos)[i] < 0)
         {
-            // Move filter coeffs left to compensate for filterPos
+            // move filter coefficients left to compensate for filterPos
             for (j=1; j<filterSize; j++)
             {
                 int left= FFMAX(j + (*filterPos)[i], 0);
@@ -1402,7 +1403,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
         if ((*filterPos)[i] + filterSize > srcW)
         {
             int shift= (*filterPos)[i] + filterSize - srcW;
-            // Move filter coeffs right to compensate for filterPos
+            // move filter coefficients right to compensate for filterPos
             for (j=filterSize-2; j>=0; j--)
             {
                 int right= FFMIN(j + shift, filterSize-1);
@@ -1413,11 +1414,11 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
         }
     }
 
-    // Note the +1 is for the MMXscaler which reads over the end
+    // Note the +1 is for the MMX scaler which reads over the end
     /* align at 16 for AltiVec (needed by hScale_altivec_real) */
     *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
 
-    /* Normalize & Store in outFilter */
+    /* normalize & store in outFilter */
     for (i=0; i<dstW; i++)
     {
         int j;
@@ -1660,7 +1661,7 @@ static SwsFunc getSwsFunc(int flags){
 #else //RUNTIME_CPUDETECT
 #if   HAVE_MMX2
     return swScale_MMX2;
-#elif HAVE_3DNOW
+#elif HAVE_AMD3DNOW
     return swScale_3DNow;
 #elif HAVE_MMX
     return swScale_MMX;
@@ -2075,7 +2076,7 @@ static uint16_t roundToInt16(int64_t f){
 }
 
 /**
- * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
+ * @param inv_table the yuv2rgb coefficients, normally Inverse_Table_6_9[x]
  * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
  * @return -1 if not supported
  */
@@ -2197,7 +2198,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
     flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
 #if   HAVE_MMX2
     flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
-#elif HAVE_3DNOW
+#elif HAVE_AMD3DNOW
     flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
 #elif HAVE_MMX
     flags |= SWS_CPU_CAPS_MMX;
@@ -2242,7 +2243,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
                 |SWS_BICUBLIN);
     if(!i || (i & (i-1)))
     {
-        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be choosen\n");
+        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
         return NULL;
     }
 
@@ -2254,7 +2255,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
         return NULL;
     }
     if(srcW > VOFW || dstW > VOFW){
-        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile time max width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
+        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
         return NULL;
     }
 
@@ -2288,14 +2289,14 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
     getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
     getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
 
-    // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
+    // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
     if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
 
     // drop some chroma lines if the user wants it
     c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
     c->chrSrcVSubSample+= c->vChrDrop;
 
-    // drop every 2. pixel for chroma calculation unless user wants full chroma
+    // drop every other pixel for chroma calculation unless user wants full chroma
     if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
       && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
       && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
@@ -2322,7 +2323,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
 
     sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
 
-    /* unscaled special Cases */
+    /* unscaled special cases */
     if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
     {
         /* yv12_to_nv12 */
@@ -2348,7 +2349,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
         if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_ACCURATE_RND))
             c->swScale= bgr24toyv12Wrapper;
 
-        /* rgb/bgr -> rgb/bgr (no dither needed forms) */
+        /* RGB/BGR -> RGB/BGR (no dither needed forms) */
         if (  (isBGR(srcFormat) || isRGB(srcFormat))
            && (isBGR(dstFormat) || isRGB(dstFormat))
            && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
@@ -2448,7 +2449,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
         {
             if (flags&SWS_PRINT_INFO)
-                av_log(c, AV_LOG_INFO, "output Width is not a multiple of 32 -> no MMX2 scaler\n");
+                av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
         }
         if (usesHFilter) c->canMMX2BeUsed=0;
     }
@@ -2471,7 +2472,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
             c->lumXInc+= 20;
             c->chrXInc+= 20;
         }
-        //we don't use the x86asm scaler if mmx is available
+        //we don't use the x86 asm scaler if MMX is available
         else if (flags & SWS_CPU_CAPS_MMX)
         {
             c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
@@ -2517,7 +2518,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
             initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
         }
 #endif /* defined(COMPILE_MMX2) */
-    } // Init Horizontal stuff
+    } // initialize horizontal stuff
 
 
 
@@ -2557,7 +2558,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
 #endif
     }
 
-    // Calculate Buffer Sizes so that they won't run out while handling these damn slices
+    // calculate buffer sizes so that they won't run out while handling these damn slices
     c->vLumBufSize= c->vLumFilterSize;
     c->vChrBufSize= c->vChrFilterSize;
     for (i=0; i<dstH; i++)
@@ -2577,7 +2578,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
     // allocate pixbufs (we use dynamic allocation because otherwise we would need to
     c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
     c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
-    //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
+    //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
     /* align at 16 bytes for AltiVec */
     for (i=0; i<c->vLumBufSize; i++)
         c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
@@ -2668,7 +2669,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
         else
         {
 #if ARCH_X86
-            av_log(c, AV_LOG_VERBOSE, "using X86-Asm scaler for horizontal scaling\n");
+            av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
 #else
             if (flags & SWS_FAST_BILINEAR)
                 av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n");
@@ -2695,22 +2696,22 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
         }
 
         if (dstFormat==PIX_FMT_BGR24)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 Converter\n",
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
                    (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
         else if (dstFormat==PIX_FMT_RGB32)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
         else if (dstFormat==PIX_FMT_BGR565)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
         else if (dstFormat==PIX_FMT_BGR555)
-            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
+            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
 
         av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
     }
     if (flags & SWS_PRINT_INFO)
     {
-        av_log(c, AV_LOG_DEBUG, "Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
+        av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
                c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
-        av_log(c, AV_LOG_DEBUG, "Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
+        av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
                c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
     }
 
@@ -2720,7 +2721,7 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
 
 /**
  * swscale wrapper, so we don't need to export the SwsContext.
- * assumes planar YUV to be in YUV order instead of YVU
+ * Assumes planar YUV to be in YUV order instead of YVU.
  */
 int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
               int srcSliceH, uint8_t* dst[], int dstStride[]){
@@ -2820,7 +2821,7 @@ int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
 }
 
 /**
- * swscale wrapper, so we don't need to export the SwsContext
+ * swscale wrapper, so we don't need to export the SwsContext.
  */
 int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
                       int srcSliceH, uint8_t* dst[], int dstStride[]){
@@ -2886,8 +2887,8 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
 }
 
 /**
- * returns a normalized gaussian curve used to filter stuff
- * quality=3 is high quality, lowwer is lowwer quality
+ * Returns a normalized Gaussian curve used to filter stuff
+ * quality=3 is high quality, lower is lower quality.
  */
 SwsVector *sws_getGaussianVec(double variance, double quality){
     const int length= (int)(variance*quality + 0.5) | 1;
@@ -3154,7 +3155,7 @@ void sws_freeContext(SwsContext *c){
 #endif
     c->funnyYCode=NULL;
     c->funnyUVCode=NULL;
-#endif /* ARCH_X86 */
+#endif /* ARCH_X86 && CONFIG_GPL */
 
     av_freep(&c->lumMmx2Filter);
     av_freep(&c->chrMmx2Filter);
@@ -3168,7 +3169,7 @@ void sws_freeContext(SwsContext *c){
 /**
  * Checks if context is valid or reallocs a new one instead.
  * If context is NULL, just calls sws_getContext() to get a new one.
- * Otherwise, checks if the parameters are the same already saved in context.
+ * Otherwise, checks if the parameters are the ones already saved in context.
  * If that is the case, returns the current context.
  * Otherwise, frees context and gets a new one.
  *
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index b2505c0f7d..38db01023a 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -52,6 +52,8 @@
 #   define APCK_SIZE 16
 #endif
 
+struct SwsContext;
+
 typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
              int srcSliceH, uint8_t* dst[], int dstStride[]);
 
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 2144b5a57c..c0e680ca4a 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -29,14 +29,14 @@
 #undef EMMS
 #undef SFENCE
 
-#if HAVE_3DNOW
+#if HAVE_AMD3DNOW
 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
 #define EMMS     "femms"
 #else
 #define EMMS     "emms"
 #endif
 
-#if HAVE_3DNOW
+#if HAVE_AMD3DNOW
 #define PREFETCH  "prefetch"
 #define PREFETCHW "prefetchw"
 #elif HAVE_MMX2
@@ -55,7 +55,7 @@
 
 #if HAVE_MMX2
 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif HAVE_3DNOW
+#elif HAVE_AMD3DNOW
 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
 #endif
 
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 9dedd2a701..5aaa5728ea 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -55,9 +55,9 @@ DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
 //MMX versions
 #undef RENAME
 #undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
 #define HAVE_MMX2 0
-#define HAVE_3DNOW 0
+#define HAVE_AMD3DNOW 0
 #define RENAME(a) a ## _MMX
 #include "yuv2rgb_template.c"
 
diff --git a/libswscale/yuv2rgb_template.c b/libswscale/yuv2rgb_template.c
index c66f24edeb..a850545a48 100644
--- a/libswscale/yuv2rgb_template.c
+++ b/libswscale/yuv2rgb_template.c
@@ -30,7 +30,7 @@
 #undef EMMS
 #undef SFENCE
 
-#if HAVE_3DNOW
+#if HAVE_AMD3DNOW
 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
 #define EMMS     "femms"
 #else
diff --git a/mencoder.c b/mencoder.c
index 11b40b49f1..413d5b39a2 100644
--- a/mencoder.c
+++ b/mencoder.c
@@ -36,13 +36,9 @@
 
 #include <sys/time.h>
 
-
-#include "version.h"
 #include "mp_msg.h"
 #include "help_mp.h"
 
-#include "cpudetect.h"
-
 #include "codec-cfg.h"
 #include "m_option.h"
 #include "m_config.h"
@@ -423,40 +419,7 @@ audio_encoder_t *aencoder = NULL;
   // Preparse the command line
   m_config_preparse_command_line(mconfig,argc,argv);
 
-  mp_msg(MSGT_CPLAYER,MSGL_INFO, "MEncoder " VERSION " (C) 2000-2009 MPlayer Team\n");
-
-  /* Test for cpu capabilities (and corresponding OS support) for optimizing */
-  GetCpuCaps(&gCpuCaps);
-#if ARCH_X86
-  mp_msg(MSGT_CPLAYER,MSGL_INFO,"CPUflags: Type: %d MMX: %d MMX2: %d 3DNow: %d 3DNow2: %d SSE: %d SSE2: %d\n",
-      gCpuCaps.cpuType,gCpuCaps.hasMMX,gCpuCaps.hasMMX2,
-      gCpuCaps.has3DNow, gCpuCaps.has3DNowExt,
-      gCpuCaps.hasSSE, gCpuCaps.hasSSE2);
-#ifdef RUNTIME_CPUDETECT
-  mp_msg(MSGT_CPLAYER,MSGL_INFO, MSGTR_CompiledWithRuntimeDetection);
-#else
-  mp_msg(MSGT_CPLAYER,MSGL_INFO, MSGTR_CompiledWithCPUExtensions);
-#if HAVE_MMX
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," MMX");
-#endif
-#if HAVE_MMX2
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," MMX2");
-#endif
-#if HAVE_3DNOW
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," 3DNow");
-#endif
-#if HAVE_3DNOWEX
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," 3DNowEx");
-#endif
-#if HAVE_SSE
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," SSE");
-#endif
-#if HAVE_SSE2
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," SSE2");
-#endif
-  mp_msg(MSGT_CPLAYER,MSGL_INFO,"\n\n");
-#endif
-#endif
+  print_version("MEncoder");
 
 #if (defined(__MINGW32__) || defined(__CYGWIN__)) && defined(CONFIG_WIN32DLL)
   set_path_env();
diff --git a/mp3lib/dct64_sse.c b/mp3lib/dct64_sse.c
index 34650d01ba..9381b2adf7 100644
--- a/mp3lib/dct64_sse.c
+++ b/mp3lib/dct64_sse.c
@@ -5,7 +5,7 @@
  * and mp3lib/dct64_mmx.c
  */
 
-#include <libavutil/mem.h>
+#include "libavutil/internal.h"
 
 typedef float real;
 
diff --git a/mp3lib/sr1.c b/mp3lib/sr1.c
index 31a4a99a49..6606ed3429 100644
--- a/mp3lib/sr1.c
+++ b/mp3lib/sr1.c
@@ -20,6 +20,7 @@
 #include "huffman.h"
 #include "mp3.h"
 #include "libavutil/common.h"
+#include "libavutil/internal.h"
 #include "mpbswap.h"
 #include "cpudetect.h"
 //#include "liba52/mm_accel.h"
@@ -27,12 +28,15 @@
 
 #include "libvo/fastmemcpy.h"
 
+#undef fprintf
+#undef printf
+
 #if ARCH_X86_64
 // 3DNow! and 3DNow!Ext routines don't compile under AMD64
 #undef HAVE_3DNOW
-#undef HAVE_3DNOWEX
+#undef HAVE_3DNOWEXT
 #define HAVE_3DNOW 0
-#define HAVE_3DNOWEX 0
+#define HAVE_3DNOWEXT 0
 #endif
 
 //static FILE* mp3_file=NULL;
@@ -423,7 +427,7 @@ void MP3_Init(void){
     }
 #endif
 
-#if HAVE_3DNOWEX
+#if HAVE_3DNOWEXT
     if (gCpuCaps.has3DNowExt)
     {
 	dct36_func=dct36_3dnowex;
diff --git a/mpcommon.c b/mpcommon.c
index d07e4eb27c..4c6c780a7b 100644
--- a/mpcommon.c
+++ b/mpcommon.c
@@ -7,7 +7,11 @@
 #include "mplayer.h"
 #include "libvo/sub.h"
 #include "libvo/video_out.h"
+#include "cpudetect.h"
+#include "help_mp.h"
+#include "mp_msg.h"
 #include "spudec.h"
+#include "version.h"
 #include "vobsub.h"
 #ifdef CONFIG_TV_TELETEXT
 #include "stream/tv.h"
@@ -26,6 +30,53 @@ ass_track_t* ass_track = 0; // current track to render
 sub_data* subdata = NULL;
 subtitle* vo_sub_last = NULL;
 
+
+void print_version(const char* name)
+{
+    mp_msg(MSGT_CPLAYER, MSGL_INFO, MP_TITLE, name);
+
+    /* Test for CPU capabilities (and corresponding OS support) for optimizing */
+    GetCpuCaps(&gCpuCaps);
+#if ARCH_X86
+    mp_msg(MSGT_CPLAYER, MSGL_V,
+	   "CPUflags:  MMX: %d MMX2: %d 3DNow: %d 3DNowExt: %d SSE: %d SSE2: %d SSSE3: %d\n",
+	   gCpuCaps.hasMMX, gCpuCaps.hasMMX2,
+	   gCpuCaps.has3DNow, gCpuCaps.has3DNowExt,
+	   gCpuCaps.hasSSE, gCpuCaps.hasSSE2, gCpuCaps.hasSSSE3);
+#ifdef RUNTIME_CPUDETECT
+    mp_msg(MSGT_CPLAYER,MSGL_V, MSGTR_CompiledWithRuntimeDetection);
+#else
+    mp_msg(MSGT_CPLAYER,MSGL_V, MSGTR_CompiledWithCPUExtensions);
+#if HAVE_MMX
+    mp_msg(MSGT_CPLAYER,MSGL_V," MMX");
+#endif
+#if HAVE_MMX2
+    mp_msg(MSGT_CPLAYER,MSGL_V," MMX2");
+#endif
+#if HAVE_3DNOW
+    mp_msg(MSGT_CPLAYER,MSGL_V," 3DNow");
+#endif
+#if HAVE_3DNOWEXT
+    mp_msg(MSGT_CPLAYER,MSGL_V," 3DNowExt");
+#endif
+#if HAVE_SSE
+    mp_msg(MSGT_CPLAYER,MSGL_V," SSE");
+#endif
+#if HAVE_SSE2
+    mp_msg(MSGT_CPLAYER,MSGL_V," SSE2");
+#endif
+#if HAVE_SSSE3
+    mp_msg(MSGT_CPLAYER,MSGL_V," SSSE3");
+#endif
+#if HAVE_CMOV
+    mp_msg(MSGT_CPLAYER,MSGL_V," CMOV");
+#endif
+    mp_msg(MSGT_CPLAYER,MSGL_V,"\n");
+#endif /* RUNTIME_CPUDETECT */
+#endif /* ARCH_X86 */
+}
+
+
 void update_subtitles(sh_video_t *sh_video, demux_stream_t *d_dvdsub, int reset)
 {
     struct MPOpts *opts = sh_video->opts;
diff --git a/mpcommon.h b/mpcommon.h
index bdee63d898..a0c6fb8ff8 100644
--- a/mpcommon.h
+++ b/mpcommon.h
@@ -8,6 +8,8 @@
 extern double sub_last_pts;
 extern struct ass_track_s *ass_track;
 extern subtitle *vo_sub_last;
+
+void print_version(const char* name);
 void update_subtitles(sh_video_t *sh_video, demux_stream_t *d_dvdsub, int reset);
 void update_teletext(sh_video_t *sh_video, demuxer_t *demuxer, int reset);
 int select_audio(demuxer_t* demuxer, int audio_id, char* audio_lang);
diff --git a/mplayer.c b/mplayer.c
index e5594f8a7d..57edf9ee91 100644
--- a/mplayer.c
+++ b/mplayer.c
@@ -38,8 +38,6 @@
 
 #include <errno.h>
 
-#include "version.h"
-
 #include "mp_msg.h"
 
 #define HELP_MP_DEFINE_STATIC
@@ -79,8 +77,6 @@
 #include "osdep/getch2.h"
 #include "osdep/timer.h"
 
-#include "cpudetect.h"
-
 #ifdef CONFIG_GUI
 #include "gui/interface.h"
 #endif
@@ -2407,43 +2403,6 @@ static void pause_loop(struct MPContext *mpctx)
 #endif
 }
 
-static void print_version(void){
-  mp_msg(MSGT_CPLAYER, MSGL_INFO, "%s\n", MP_TITLE);
-
-/* Test for CPU capabilities (and corresponding OS support) for optimizing */
-  GetCpuCaps(&gCpuCaps);
-#if ARCH_X86
-  mp_msg(MSGT_CPLAYER,MSGL_INFO,"CPUflags:  MMX: %d MMX2: %d 3DNow: %d 3DNow2: %d SSE: %d SSE2: %d\n",
-      gCpuCaps.hasMMX,gCpuCaps.hasMMX2,
-      gCpuCaps.has3DNow, gCpuCaps.has3DNowExt,
-      gCpuCaps.hasSSE, gCpuCaps.hasSSE2);
-#ifdef RUNTIME_CPUDETECT
-  mp_msg(MSGT_CPLAYER,MSGL_INFO, MSGTR_CompiledWithRuntimeDetection);
-#else
-  mp_msg(MSGT_CPLAYER,MSGL_INFO, MSGTR_CompiledWithCPUExtensions);
-#if HAVE_MMX
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," MMX");
-#endif
-#if HAVE_MMX2
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," MMX2");
-#endif
-#if HAVE_3DNOW
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," 3DNow");
-#endif
-#if HAVE_3DNOWEX
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," 3DNowEx");
-#endif
-#if HAVE_SSE
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," SSE");
-#endif
-#if HAVE_SSE2
-  mp_msg(MSGT_CPLAYER,MSGL_INFO," SSE2");
-#endif
-  mp_msg(MSGT_CPLAYER,MSGL_INFO,"\n");
-#endif /* RUNTIME_CPUDETECT */
-#endif /* ARCH_X86 */
-}
-
 
 // Find the right mute status and record position for new file position
 static void edl_seek_reset(MPContext *mpctx)
@@ -2605,7 +2564,7 @@ int gui_no_filename=0;
   // Preparse the command line
   m_config_preparse_command_line(mpctx->mconfig,argc,argv);
 
-  print_version();
+  print_version("MPlayer");
 #if (defined(__MINGW32__) || defined(__CYGWIN__)) && defined(CONFIG_WIN32DLL)
   set_path_env();
 #endif
diff --git a/playtree.c b/playtree.c
index c499e8d1d7..1124d2f023 100644
--- a/playtree.c
+++ b/playtree.c
@@ -21,8 +21,10 @@ play_tree_is_valid(play_tree_t* pt);
 play_tree_t*
 play_tree_new(void) {
   play_tree_t* r = calloc(1,sizeof(play_tree_t));
-  if(r == NULL)
+  if(r == NULL) {
     mp_msg(MSGT_PLAYTREE,MSGL_ERR,"Can't allocate %d bytes of memory\n",(int)sizeof(play_tree_t));
+    return NULL;
+  }
   r->entry_type = PLAY_TREE_ENTRY_NODE;
   return r;
 }
diff --git a/playtreeparser.c b/playtreeparser.c
index 9e83cb4161..9d34cc737b 100644
--- a/playtreeparser.c
+++ b/playtreeparser.c
@@ -453,6 +453,8 @@ parse_smil(play_tree_parser_t* p) {
       continue;
     if (strncasecmp(line,"<?xml",5)==0) // smil in xml
       continue;
+    if (strncasecmp(line,"<!DOCTYPE smil",13)==0) // smil in xml
+      continue;
     if (strncasecmp(line,"<smil",5)==0 || strncasecmp(line,"<?wpl",5)==0 ||
       strncasecmp(line,"(smil-document",14)==0)
       break; // smil header found
diff --git a/stream/freesdp/parser.c b/stream/freesdp/parser.c
index 33bc1d59b5..c17041b7ba 100644
--- a/stream/freesdp/parser.c
+++ b/stream/freesdp/parser.c
@@ -1644,7 +1644,7 @@ const char *
 fsdp_get_media_format (const fsdp_media_description_t * dsc,
 		       unsigned int index)
 {
-  if (!dsc && (index < dsc->formats_count))
+  if (!dsc || (index < dsc->formats_count))
     return NULL;
   return dsc->formats[index];
 }
diff --git a/version.sh b/version.sh
index c9e5dd3b73..50a757c333 100755
--- a/version.sh
+++ b/version.sh
@@ -9,7 +9,7 @@ test $svn_revision || svn_revision=UNKNOWN
 
 NEW_REVISION="#define VERSION \"dev-SVN-r${svn_revision}${extra}\""
 OLD_REVISION=`cat version.h 2> /dev/null`
-TITLE="#define MP_TITLE \"MPlayer dev-SVN-r${svn_revision}${extra} (C) 2000-2009 MPlayer Team\""
+TITLE="#define MP_TITLE \"%s dev-SVN-r${svn_revision}${extra} (C) 2000-2009 MPlayer Team\\\n\""
 
 # Update version.h only on revision changes to avoid spurious rebuilds
 if test "$NEW_REVISION" != "$OLD_REVISION"; then