diff --git a/libswscale/options.c b/libswscale/options.c
index 9ba6e5e714..8072d436d2 100644
--- a/libswscale/options.c
+++ b/libswscale/options.c
@@ -19,12 +19,12 @@
  */
 
 #include "libavutil/avutil.h"
-#include "libavutil/pixfmt.h"
 #include "libavutil/opt.h"
+#include "libavutil/pixfmt.h"
 #include "swscale.h"
 #include "swscale_internal.h"
 
-static const char * sws_context_to_name(void * ptr)
+static const char *sws_context_to_name(void *ptr)
 {
     return "swscaler";
 }
@@ -34,34 +34,34 @@ static const char * sws_context_to_name(void * ptr)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
 static const AVOption options[] = {
-    { "sws_flags", "scaler/cpu flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.dbl = DEFAULT }, 0, UINT_MAX, VE, "sws_flags" },
-    { "fast_bilinear", "fast bilinear", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_FAST_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_BICUBIC }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "experimental", "experimental", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_X }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "neighbor", "nearest neighbor", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_POINT }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "area", "averaging area", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_AREA }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bicublin", "luma bicubic, chroma bilinear", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_BICUBLIN }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "gauss", "gaussian", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_GAUSS }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "sinc", "sinc", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_SINC }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_LANCZOS }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "spline", "natural bicubic spline", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_SPLINE }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "print_info", "print info", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_PRINT_INFO }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "accurate_rnd", "accurate rounding", 0, AV_OPT_TYPE_CONST, {.dbl = SWS_ACCURATE_RND }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "full_chroma_int", "full chroma interpolation", 0 , AV_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "full_chroma_inp", "full chroma input", 0 , AV_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" },
-    { "bitexact", "", 0 , AV_OPT_TYPE_CONST, {.dbl = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" },
+    { "sws_flags",       "scaler/cpu flags",              OFFSET(flags),     AV_OPT_TYPE_FLAGS,  { .dbl = DEFAULT            }, 0,       UINT_MAX,       VE, "sws_flags" },
+    { "fast_bilinear",   "fast bilinear",                 0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_FAST_BILINEAR  }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "bilinear",        "bilinear",                      0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_BILINEAR       }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "bicubic",         "bicubic",                       0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_BICUBIC        }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "experimental",    "experimental",                  0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_X              }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "neighbor",        "nearest neighbor",              0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_POINT          }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "area",            "averaging area",                0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_AREA           }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "bicublin",        "luma bicubic, chroma bilinear", 0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_BICUBLIN       }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "gauss",           "gaussian",                      0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_GAUSS          }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "sinc",            "sinc",                          0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_SINC           }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "lanczos",         "lanczos",                       0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_LANCZOS        }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "spline",          "natural bicubic spline",        0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_SPLINE         }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "print_info",      "print info",                    0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_PRINT_INFO     }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "accurate_rnd",    "accurate rounding",             0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_ACCURATE_RND   }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "full_chroma_int", "full chroma interpolation",     0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "full_chroma_inp", "full chroma input",             0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "bitexact",        "",                              0,                 AV_OPT_TYPE_CONST,  { .dbl = SWS_BITEXACT       }, INT_MIN, INT_MAX,        VE, "sws_flags" },
 
-    { "srcw", "source width"      , OFFSET(srcW), AV_OPT_TYPE_INT, {.dbl = 16 }, 1, INT_MAX, VE },
-    { "srch", "source height"     , OFFSET(srcH), AV_OPT_TYPE_INT, {.dbl = 16 }, 1, INT_MAX, VE },
-    { "dstw", "destination width" , OFFSET(dstW), AV_OPT_TYPE_INT, {.dbl = 16 }, 1, INT_MAX, VE },
-    { "dsth", "destination height", OFFSET(dstH), AV_OPT_TYPE_INT, {.dbl = 16 }, 1, INT_MAX, VE },
-    { "src_format", "source format"     , OFFSET(srcFormat), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, PIX_FMT_NB-1, VE },
-    { "dst_format", "destination format", OFFSET(dstFormat), AV_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, PIX_FMT_NB-1, VE },
-    { "src_range" , "source range"      , OFFSET(srcRange) , AV_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, 1, VE },
-    { "dst_range" , "destination range" , OFFSET(dstRange) , AV_OPT_TYPE_INT, {.dbl = DEFAULT }, 0, 1, VE },
-    { "param0" , "scaler param 0" , OFFSET(param[0]) , AV_OPT_TYPE_DOUBLE, {.dbl = SWS_PARAM_DEFAULT}, INT_MIN, INT_MAX, VE },
-    { "param1" , "scaler param 1" , OFFSET(param[1]) , AV_OPT_TYPE_DOUBLE, {.dbl = SWS_PARAM_DEFAULT}, INT_MIN, INT_MAX, VE },
+    { "srcw",            "source width",                  OFFSET(srcW),      AV_OPT_TYPE_INT,    { .dbl = 16                 }, 1,       INT_MAX,        VE },
+    { "srch",            "source height",                 OFFSET(srcH),      AV_OPT_TYPE_INT,    { .dbl = 16                 }, 1,       INT_MAX,        VE },
+    { "dstw",            "destination width",             OFFSET(dstW),      AV_OPT_TYPE_INT,    { .dbl = 16                 }, 1,       INT_MAX,        VE },
+    { "dsth",            "destination height",            OFFSET(dstH),      AV_OPT_TYPE_INT,    { .dbl = 16                 }, 1,       INT_MAX,        VE },
+    { "src_format",      "source format",                 OFFSET(srcFormat), AV_OPT_TYPE_INT,    { .dbl = DEFAULT            }, 0,       PIX_FMT_NB - 1, VE },
+    { "dst_format",      "destination format",            OFFSET(dstFormat), AV_OPT_TYPE_INT,    { .dbl = DEFAULT            }, 0,       PIX_FMT_NB - 1, VE },
+    { "src_range",       "source range",                  OFFSET(srcRange),  AV_OPT_TYPE_INT,    { .dbl = DEFAULT            }, 0,       1,              VE },
+    { "dst_range",       "destination range",             OFFSET(dstRange),  AV_OPT_TYPE_INT,    { .dbl = DEFAULT            }, 0,       1,              VE },
+    { "param0",          "scaler param 0",                OFFSET(param[0]),  AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT  }, INT_MIN, INT_MAX,        VE },
+    { "param1",          "scaler param 1",                OFFSET(param[1]),  AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT  }, INT_MIN, INT_MAX,        VE },
 
     { NULL }
 };
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 2a9133d498..14b595f107 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -22,48 +22,58 @@
  * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+
 #include <inttypes.h>
-#include "config.h"
+
 #include "libavutil/bswap.h"
+#include "config.h"
 #include "rgb2rgb.h"
 #include "swscale.h"
 #include "swscale_internal.h"
 
-void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
 void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
-void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
 void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
 void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size);
 
-void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+void (*rgb32to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb32to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb24to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16to32)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb16to15)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15to16)(const uint8_t *src, uint8_t *dst, int src_size);
+void (*rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size);
+
+void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+
+void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc,
+                   const uint8_t *vsrc, uint8_t *dst,
                    int width, int height,
                    int lumStride, int chromStride, int dstStride);
-void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc,
+                   const uint8_t *vsrc, uint8_t *dst,
                    int width, int height,
                    int lumStride, int chromStride, int dstStride);
-void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc,
+                      const uint8_t *vsrc, uint8_t *dst,
                       int width, int height,
                       int lumStride, int chromStride, int dstStride);
-void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
+void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc,
+                      const uint8_t *vsrc, uint8_t *dst,
                       int width, int height,
                       int lumStride, int chromStride, int dstStride);
-void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst,
+                   uint8_t *udst, uint8_t *vdst,
                    int width, int height,
                    int lumStride, int chromStride, int srcStride);
-void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
+                    uint8_t *udst, uint8_t *vdst,
                     int width, int height,
                     int lumStride, int chromStride, int srcStride);
 void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
@@ -76,45 +86,44 @@ void (*vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                     int width, int height,
                     int srcStride1, int srcStride2,
                     int dstStride1, int dstStride2);
-void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
-                     uint8_t *dst,
+void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2,
+                     const uint8_t *src3, uint8_t *dst,
                      int width, int height,
                      int srcStride1, int srcStride2,
                      int srcStride3, int dstStride);
-void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     int width, int height,
+void (*uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                     const uint8_t *src, int width, int height,
                      int lumStride, int chromStride, int srcStride);
-void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     int width, int height,
+void (*uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                     const uint8_t *src, int width, int height,
                      int lumStride, int chromStride, int srcStride);
-void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     int width, int height,
+void (*yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                     const uint8_t *src, int width, int height,
                      int lumStride, int chromStride, int srcStride);
-void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
-                     int width, int height,
+void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+                     const uint8_t *src, int width, int height,
                      int lumStride, int chromStride, int srcStride);
 
 #define RGB2YUV_SHIFT 8
-#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
-#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
-#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
-#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
-#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
-#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
-#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
-#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
-#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
+#define BY ((int)( 0.098 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define BV ((int)(-0.071 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define BU ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define GY ((int)( 0.504 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define GV ((int)(-0.368 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define GU ((int)(-0.291 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define RY ((int)( 0.257 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define RV ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define RU ((int)(-0.148 * (1 << RGB2YUV_SHIFT) + 0.5))
 
 //plain C versions
 #include "rgb2rgb_template.c"
 
-
 /*
- RGB15->RGB16 original by Strepto/Astral
- ported to gcc & bugfixed : A'rpi
- MMX2, 3DNOW optimization by Nick Kurshev
- 32-bit C version, and and&add trick by Michael Niedermayer
-*/
+ * RGB15->RGB16 original by Strepto/Astral
+ * ported to gcc & bugfixed : A'rpi
+ * MMX2, 3DNOW optimization by Nick Kurshev
+ * 32-bit C version, and and&add trick by Michael Niedermayer
+ */
 
 void sws_rgb2rgb_init(void)
 {
@@ -127,16 +136,16 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
     int i, num_pixels = src_size >> 2;
 
-    for (i=0; i<num_pixels; i++) {
+    for (i = 0; i < num_pixels; i++) {
 #if HAVE_BIGENDIAN
         /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
-        dst[3*i + 0] = src[4*i + 1];
-        dst[3*i + 1] = src[4*i + 2];
-        dst[3*i + 2] = src[4*i + 3];
+        dst[3 * i + 0] = src[4 * i + 1];
+        dst[3 * i + 1] = src[4 * i + 2];
+        dst[3 * i + 2] = src[4 * i + 3];
 #else
-        dst[3*i + 0] = src[4*i + 2];
-        dst[3*i + 1] = src[4*i + 1];
-        dst[3*i + 2] = src[4*i + 0];
+        dst[3 * i + 0] = src[4 * i + 2];
+        dst[3 * i + 1] = src[4 * i + 1];
+        dst[3 * i + 2] = src[4 * i + 0];
 #endif
     }
 }
@@ -144,39 +153,40 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size)
 void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size)
 {
     int i;
-    for (i=0; 3*i<src_size; i++) {
+
+    for (i = 0; 3 * i < src_size; i++) {
 #if HAVE_BIGENDIAN
-        /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
-        dst[4*i + 0] = 255;
-        dst[4*i + 1] = src[3*i + 0];
-        dst[4*i + 2] = src[3*i + 1];
-        dst[4*i + 3] = src[3*i + 2];
+        /* RGB24 (= R, G, B) -> BGR32 (= A, R, G, B) */
+        dst[4 * i + 0] = 255;
+        dst[4 * i + 1] = src[3 * i + 0];
+        dst[4 * i + 2] = src[3 * i + 1];
+        dst[4 * i + 3] = src[3 * i + 2];
 #else
-        dst[4*i + 0] = src[3*i + 2];
-        dst[4*i + 1] = src[3*i + 1];
-        dst[4*i + 2] = src[3*i + 0];
-        dst[4*i + 3] = 255;
+        dst[4 * i + 0] = src[3 * i + 2];
+        dst[4 * i + 1] = src[3 * i + 1];
+        dst[4 * i + 2] = src[3 * i + 0];
+        dst[4 * i + 3] = 255;
 #endif
     }
 }
 
 void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
+    uint8_t *d          = dst;
+    const uint16_t *s   = (const uint16_t *)src;
     const uint16_t *end = s + src_size / 2;
 
     while (s < end) {
         register uint16_t bgr = *s++;
 #if HAVE_BIGENDIAN
         *d++ = 255;
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0xF800)>>8;
+        *d++ = (bgr & 0x1F)   << 3;
+        *d++ = (bgr & 0x7E0)  >> 3;
+        *d++ = (bgr & 0xF800) >> 8;
 #else
-        *d++ = (bgr&0xF800)>>8;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr & 0xF800) >> 8;
+        *d++ = (bgr & 0x7E0)  >> 3;
+        *d++ = (bgr & 0x1F)   << 3;
         *d++ = 255;
 #endif
     }
@@ -184,34 +194,34 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 
 void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    uint16_t *d = (uint16_t *)dst;
-    const uint16_t *s = (const uint16_t *)src;
     uint16_t rgb, r, g, b;
+    uint16_t *d         = (uint16_t *)dst;
+    const uint16_t *s   = (const uint16_t *)src;
     const uint16_t *end = s + src_size / 2;
 
     while (s < end) {
-        rgb = *s++;
-        r = rgb & 0xF00;
-        g = rgb & 0x0F0;
-        b = rgb & 0x00F;
-        r = (r << 3) | ((r & 0x800) >> 1);
-        g = (g << 2) | ((g & 0x080) >> 2);
-        b = (b << 1) | ( b          >> 3);
+        rgb  = *s++;
+        r    = rgb & 0xF00;
+        g    = rgb & 0x0F0;
+        b    = rgb & 0x00F;
+        r    = (r << 3) | ((r & 0x800) >> 1);
+        g    = (g << 2) | ((g & 0x080) >> 2);
+        b    = (b << 1) | ( b          >> 3);
         *d++ = r | g | b;
     }
 }
 
 void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
+    uint8_t *d          = dst;
+    const uint16_t *s   = (const uint16_t *)src;
     const uint16_t *end = s + src_size / 2;
 
     while (s < end) {
         register uint16_t bgr = *s++;
-        *d++ = (bgr&0xF800)>>8;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr & 0xF800) >> 8;
+        *d++ = (bgr & 0x7E0)  >> 3;
+        *d++ = (bgr & 0x1F)   << 3;
     }
 }
 
@@ -219,9 +229,9 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
 {
     int i, num_pixels = src_size >> 1;
 
-    for (i=0; i<num_pixels; i++) {
-        unsigned rgb = ((const uint16_t*)src)[i];
-        ((uint16_t*)dst)[i] = (rgb>>11) | (rgb&0x7E0) | (rgb<<11);
+    for (i = 0; i < num_pixels; i++) {
+        unsigned rgb         = ((const uint16_t *)src)[i];
+        ((uint16_t *)dst)[i] = (rgb >> 11) | (rgb & 0x7E0) | (rgb << 11);
     }
 }
 
@@ -229,29 +239,29 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
 {
     int i, num_pixels = src_size >> 1;
 
-    for (i=0; i<num_pixels; i++) {
-        unsigned rgb = ((const uint16_t*)src)[i];
-        ((uint16_t*)dst)[i] = (rgb>>11) | ((rgb&0x7C0)>>1) | ((rgb&0x1F)<<10);
+    for (i = 0; i < num_pixels; i++) {
+        unsigned rgb         = ((const uint16_t *)src)[i];
+        ((uint16_t *)dst)[i] = (rgb >> 11) | ((rgb & 0x7C0) >> 1) | ((rgb & 0x1F) << 10);
     }
 }
 
 void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
+    uint8_t *d          = dst;
+    const uint16_t *s   = (const uint16_t *)src;
     const uint16_t *end = s + src_size / 2;
 
     while (s < end) {
         register uint16_t bgr = *s++;
 #if HAVE_BIGENDIAN
         *d++ = 255;
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x7C00)>>7;
+        *d++ = (bgr & 0x1F)   << 3;
+        *d++ = (bgr & 0x3E0)  >> 2;
+        *d++ = (bgr & 0x7C00) >> 7;
 #else
-        *d++ = (bgr&0x7C00)>>7;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr & 0x7C00) >> 7;
+        *d++ = (bgr & 0x3E0)  >> 2;
+        *d++ = (bgr & 0x1F)   << 3;
         *d++ = 255;
 #endif
     }
@@ -259,15 +269,15 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
 
 void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
+    uint8_t *d          = dst;
+    const uint16_t *s   = (const uint16_t *)src;
     const uint16_t *end = s + src_size / 2;
 
     while (s < end) {
         register uint16_t bgr = *s++;
-        *d++ = (bgr&0x7C00)>>7;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr & 0x7C00) >> 7;
+        *d++ = (bgr & 0x3E0)  >> 2;
+        *d++ = (bgr & 0x1F)   << 3;
     }
 }
 
@@ -275,9 +285,9 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size)
 {
     int i, num_pixels = src_size >> 1;
 
-    for (i=0; i<num_pixels; i++) {
-        unsigned rgb = ((const uint16_t*)src)[i];
-        ((uint16_t*)dst)[i] = ((rgb&0x7C00)>>10) | ((rgb&0x3E0)<<1) | (rgb<<11);
+    for (i = 0; i < num_pixels; i++) {
+        unsigned rgb         = ((const uint16_t *)src)[i];
+        ((uint16_t *)dst)[i] = ((rgb & 0x7C00) >> 10) | ((rgb & 0x3E0) << 1) | (rgb << 11);
     }
 }
 
@@ -285,22 +295,22 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size)
 {
     int i, num_pixels = src_size >> 1;
 
-    for (i=0; i<num_pixels; i++) {
-        unsigned rgb = ((const uint16_t*)src)[i];
-        unsigned br  = rgb & 0x7C1F;
-        ((uint16_t*)dst)[i] = (br>>10) | (rgb&0x3E0) | (br<<10);
+    for (i = 0; i < num_pixels; i++) {
+        unsigned rgb         = ((const uint16_t *)src)[i];
+        unsigned br          = rgb & 0x7C1F;
+        ((uint16_t *)dst)[i] = (br >> 10) | (rgb & 0x3E0) | (br << 10);
     }
 }
 
 void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    uint16_t *d = (uint16_t*)dst;
-    uint16_t *s = (uint16_t*)src;
+    uint16_t *d = (uint16_t *)dst;
+    uint16_t *s = (uint16_t *)src;
     int i, num_pixels = src_size >> 1;
 
     for (i = 0; i < num_pixels; i++) {
         unsigned rgb = s[i];
-        d[i] = (rgb << 8 | rgb & 0xF0 | rgb >> 8) & 0xFFF;
+        d[i]         = (rgb << 8 | rgb & 0xF0 | rgb >> 8) & 0xFFF;
     }
 }
 
@@ -308,21 +318,22 @@ void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size)
 {
     int i, num_pixels = src_size;
 
-    for (i=0; i<num_pixels; i++) {
+    for (i = 0; i < num_pixels; i++) {
         register uint8_t rgb = src[i];
         unsigned r           = (rgb & 0x07);
         unsigned g           = (rgb & 0x38) >> 3;
         unsigned b           = (rgb & 0xC0) >> 6;
-        dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6);
+        dst[i]               = ((b << 1) & 0x07) | ((g & 0x07) << 3) | ((r & 0x03) << 6);
     }
 }
 
 #define DEFINE_SHUFFLE_BYTES(a, b, c, d)                                \
-void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, int src_size) \
+void shuffle_bytes_ ## a ## b ## c ## d(const uint8_t *src,             \
+                                        uint8_t *dst, int src_size)     \
 {                                                                       \
-    int i;                                                             \
+    int i;                                                              \
                                                                         \
-    for (i = 0; i < src_size; i+=4) {                                   \
+    for (i = 0; i < src_size; i += 4) {                                 \
         dst[i + 0] = src[i + a];                                        \
         dst[i + 1] = src[i + b];                                        \
         dst[i + 2] = src[i + c];                                        \
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 2eca93d395..d1a43e01cb 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -26,266 +26,286 @@
 
 #include <stddef.h>
 
-static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, int src_size)
+static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst,
+                                  int src_size)
 {
-    uint8_t *dest = dst;
-    const uint8_t *s = src;
+    uint8_t *dest      = dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
 #if HAVE_BIGENDIAN
-        /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
-        *dest++ = 255;
-        *dest++ = s[2];
-        *dest++ = s[1];
-        *dest++ = s[0];
-        s+=3;
+        /* RGB24 (= R, G, B) -> RGB32 (= A, B, G, R) */
+        *dest++  = 255;
+        *dest++  = s[2];
+        *dest++  = s[1];
+        *dest++  = s[0];
+        s       += 3;
 #else
-        *dest++ = *s++;
-        *dest++ = *s++;
-        *dest++ = *s++;
-        *dest++ = 255;
+        *dest++  = *s++;
+        *dest++  = *s++;
+        *dest++  = *s++;
+        *dest++  = 255;
 #endif
     }
 }
 
-static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
+static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst,
+                                  int src_size)
 {
-    uint8_t *dest = dst;
-    const uint8_t *s = src;
+    uint8_t *dest      = dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
 #if HAVE_BIGENDIAN
-        /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
+        /* RGB32 (= A, B, G, R) -> RGB24 (= R, G, B) */
         s++;
-        dest[2] = *s++;
-        dest[1] = *s++;
-        dest[0] = *s++;
-        dest += 3;
+        dest[2]  = *s++;
+        dest[1]  = *s++;
+        dest[0]  = *s++;
+        dest    += 3;
 #else
-        *dest++ = *s++;
-        *dest++ = *s++;
-        *dest++ = *s++;
+        *dest++  = *s++;
+        *dest++  = *s++;
+        *dest++  = *s++;
         s++;
 #endif
     }
 }
 
 /*
- original by Strepto/Astral
- ported to gcc & bugfixed: A'rpi
- MMX2, 3DNOW optimization by Nick Kurshev
- 32-bit C version, and and&add trick by Michael Niedermayer
-*/
+ * original by Strepto/Astral
+ * ported to gcc & bugfixed: A'rpi
+ * MMX2, 3DNOW optimization by Nick Kurshev
+ * 32-bit C version, and and&add trick by Michael Niedermayer
+ */
 static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    register const uint8_t* s=src;
-    register uint8_t* d=dst;
+    register uint8_t *d         = dst;
+    register const uint8_t *s   = src;
     register const uint8_t *end = s + src_size;
     const uint8_t *mm_end       = end - 3;
 
     while (s < mm_end) {
-        register unsigned x= *((const uint32_t *)s);
-        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
-        d+=4;
-        s+=4;
+        register unsigned x = *((const uint32_t *)s);
+        *((uint32_t *)d)    = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
+        d += 4;
+        s += 4;
     }
     if (s < end) {
-        register unsigned short x= *((const uint16_t *)s);
-        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
+        register unsigned short x = *((const uint16_t *)s);
+        *((uint16_t *)d)          = (x & 0x7FFF) + (x & 0x7FE0);
     }
 }
 
 static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    register const uint8_t* s=src;
-    register uint8_t* d=dst;
+    register uint8_t *d         = dst;
+    register const uint8_t *s   = src;
     register const uint8_t *end = s + src_size;
     const uint8_t *mm_end       = end - 3;
 
     while (s < mm_end) {
-        register uint32_t x= *((const uint32_t*)s);
-        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
-        s+=4;
-        d+=4;
+        register uint32_t x  = *((const uint32_t *)s);
+        *((uint32_t *)d)     = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
+        s                   += 4;
+        d                   += 4;
     }
     if (s < end) {
-        register uint16_t x= *((const uint16_t*)s);
-        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
+        register uint16_t x = *((const uint16_t *)s);
+        *((uint16_t *)d)    = ((x >> 1) & 0x7FE0) | (x & 0x001F);
     }
 }
 
 static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    const uint8_t *s = src;
-    uint16_t *d = (uint16_t *)dst;
+    uint16_t *d        = (uint16_t *)dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
+        register int rgb  = *(const uint32_t *)s;
+        s                += 4;
+        *d++              = ((rgb & 0xFF)     >> 3) +
+                            ((rgb & 0xFC00)   >> 5) +
+                            ((rgb & 0xF80000) >> 8);
     }
 }
 
-static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
+static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst,
+                                  int src_size)
 {
-    const uint8_t *s = src;
-    uint16_t *d = (uint16_t *)dst;
+    uint16_t *d        = (uint16_t *)dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
+        register int rgb  = *(const uint32_t *)s;
+        s                += 4;
+        *d++              = ((rgb & 0xF8)     << 8) +
+                            ((rgb & 0xFC00)   >> 5) +
+                            ((rgb & 0xF80000) >> 19);
     }
 }
 
 static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    const uint8_t *s = src;
-    uint16_t *d = (uint16_t *)dst;
+    uint16_t *d        = (uint16_t *)dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
+        register int rgb  = *(const uint32_t *)s;
+        s                += 4;
+        *d++              = ((rgb & 0xFF)     >> 3) +
+                            ((rgb & 0xF800)   >> 6) +
+                            ((rgb & 0xF80000) >> 9);
     }
 }
 
-static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
+static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst,
+                                  int src_size)
 {
-    const uint8_t *s = src;
-    uint16_t *d = (uint16_t *)dst;
+    uint16_t *d        = (uint16_t *)dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
-        register int rgb = *(const uint32_t*)s; s += 4;
-        *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
+        register int rgb  = *(const uint32_t *)s;
+        s                += 4;
+        *d++              = ((rgb & 0xF8)     <<  7) +
+                            ((rgb & 0xF800)   >>  6) +
+                            ((rgb & 0xF80000) >> 19);
     }
 }
 
-static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
+static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst,
+                                  int src_size)
 {
-    const uint8_t *s = src;
-    uint16_t *d = (uint16_t *)dst;
+    uint16_t *d        = (uint16_t *)dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
         const int b = *s++;
         const int g = *s++;
         const int r = *s++;
-        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
+        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
     }
 }
 
 static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    const uint8_t *s = src;
-    uint16_t *d = (uint16_t *)dst;
+    uint16_t *d        = (uint16_t *)dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
         const int r = *s++;
         const int g = *s++;
         const int b = *s++;
-        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
+        *d++        = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
     }
 }
 
-static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
+static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst,
+                                  int src_size)
 {
-    const uint8_t *s = src;
-    uint16_t *d = (uint16_t *)dst;
+    uint16_t *d        = (uint16_t *)dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
         const int b = *s++;
         const int g = *s++;
         const int r = *s++;
-        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
+        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
     }
 }
 
 static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    const uint8_t *s = src;
-    uint16_t *d = (uint16_t *)dst;
+    uint16_t *d        = (uint16_t *)dst;
+    const uint8_t *s   = src;
     const uint8_t *end = s + src_size;
 
     while (s < end) {
         const int r = *s++;
         const int g = *s++;
         const int b = *s++;
-        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
+        *d++        = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
     }
 }
 
 /*
-  I use less accurate approximation here by simply left-shifting the input
-  value and filling the low order bits with zeroes. This method improves PNG
-  compression but this scheme cannot reproduce white exactly, since it does
-  not generate an all-ones maximum value; the net effect is to darken the
-  image slightly.
-
-  The better method should be "left bit replication":
-
-   4 3 2 1 0
-   ---------
-   1 1 0 1 1
-
-   7 6 5 4 3  2 1 0
-   ----------------
-   1 1 0 1 1  1 1 0
-   |=======|  |===|
-       |      leftmost bits repeated to fill open bits
-       |
-   original bits
-*/
-static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
+ * I use less accurate approximation here by simply left-shifting the input
+ * value and filling the low order bits with zeroes. This method improves PNG
+ * compression but this scheme cannot reproduce white exactly, since it does
+ * not generate an all-ones maximum value; the net effect is to darken the
+ * image slightly.
+ *
+ * The better method should be "left bit replication":
+ *
+ *  4 3 2 1 0
+ *  ---------
+ *  1 1 0 1 1
+ *
+ *  7 6 5 4 3  2 1 0
+ *  ----------------
+ *  1 1 0 1 1  1 1 0
+ *  |=======|  |===|
+ *      |      leftmost bits repeated to fill open bits
+ *      |
+ *  original bits
+ */
+static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
+                                  int src_size)
 {
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t*)src;
+    uint8_t *d          = dst;
+    const uint16_t *s   = (const uint16_t *)src;
     const uint16_t *end = s + src_size / 2;
 
     while (s < end) {
         register uint16_t bgr = *s++;
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x7C00)>>7;
+        *d++ = (bgr & 0x1F)   << 3;
+        *d++ = (bgr & 0x3E0)  >> 2;
+        *d++ = (bgr & 0x7C00) >> 7;
     }
 }
 
-static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
+static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
+                                  int src_size)
 {
-    uint8_t *d = (uint8_t *)dst;
-    const uint16_t *s = (const uint16_t *)src;
+    uint8_t *d          = (uint8_t *)dst;
+    const uint16_t *s   = (const uint16_t *)src;
     const uint16_t *end = s + src_size / 2;
 
     while (s < end) {
         register uint16_t bgr = *s++;
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0xF800)>>8;
+        *d++ = (bgr & 0x1F)   << 3;
+        *d++ = (bgr & 0x7E0)  >> 3;
+        *d++ = (bgr & 0xF800) >> 8;
     }
 }
 
 static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t *)src;
+    uint8_t *d          = dst;
+    const uint16_t *s   = (const uint16_t *)src;
     const uint16_t *end = s + src_size / 2;
 
     while (s < end) {
         register uint16_t bgr = *s++;
 #if HAVE_BIGENDIAN
         *d++ = 255;
-        *d++ = (bgr&0x7C00)>>7;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr & 0x7C00) >> 7;
+        *d++ = (bgr & 0x3E0)  >> 2;
+        *d++ = (bgr & 0x1F)   << 3;
 #else
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x3E0)>>2;
-        *d++ = (bgr&0x7C00)>>7;
+        *d++ = (bgr & 0x1F)   << 3;
+        *d++ = (bgr & 0x3E0)  >> 2;
+        *d++ = (bgr & 0x7C00) >> 7;
         *d++ = 255;
 #endif
     }
@@ -293,46 +313,49 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 
 static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
-    uint8_t *d = dst;
-    const uint16_t *s = (const uint16_t*)src;
+    uint8_t *d          = dst;
+    const uint16_t *s   = (const uint16_t *)src;
     const uint16_t *end = s + src_size / 2;
 
     while (s < end) {
         register uint16_t bgr = *s++;
 #if HAVE_BIGENDIAN
         *d++ = 255;
-        *d++ = (bgr&0xF800)>>8;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0x1F)<<3;
+        *d++ = (bgr & 0xF800) >> 8;
+        *d++ = (bgr & 0x7E0)  >> 3;
+        *d++ = (bgr & 0x1F)   << 3;
 #else
-        *d++ = (bgr&0x1F)<<3;
-        *d++ = (bgr&0x7E0)>>3;
-        *d++ = (bgr&0xF800)>>8;
+        *d++ = (bgr & 0x1F)   << 3;
+        *d++ = (bgr & 0x7E0)  >> 3;
+        *d++ = (bgr & 0xF800) >> 8;
         *d++ = 255;
 #endif
     }
 }
 
-static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, int src_size)
+static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst,
+                                        int src_size)
 {
-    int idx = 15 - src_size;
-    const uint8_t *s = src-idx;
-    uint8_t *d = dst-idx;
-    for (; idx<15; idx+=4) {
-        register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
-        v &= 0xff00ff;
-        *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
+    int idx          = 15  - src_size;
+    const uint8_t *s = src - idx;
+    uint8_t *d       = dst - idx;
+
+    for (; idx < 15; idx += 4) {
+        register int v        = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
+        v                    &= 0xff00ff;
+        *(uint32_t *)&d[idx]  = (v >> 16) + g + (v << 16);
     }
 }
 
 static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
 {
     unsigned i;
-    for (i=0; i<src_size; i+=3) {
+
+    for (i = 0; i < src_size; i += 3) {
         register uint8_t x = src[i + 2];
-        dst[i + 1] = src[i + 1];
-        dst[i + 2] = src[i + 0];
-        dst[i + 0] = x;
+        dst[i + 1]         = src[i + 1];
+        dst[i + 2]         = src[i + 0];
+        dst[i + 0]         = x;
     }
 }
 
@@ -344,9 +367,10 @@ static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
 {
     int y, i;
     const int chromWidth = width >> 1;
-    for (y=0; y<height; y++) {
+
+    for (y = 0; y < height; y++) {
 #if HAVE_FAST_64BIT
-        uint64_t *ldst = (uint64_t *) dst;
+        uint64_t *ldst = (uint64_t *)dst;
         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
         for (i = 0; i < chromWidth; i += 2) {
             uint64_t k = yc[0] + (uc[0] << 8) +
@@ -354,28 +378,29 @@ static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
             uint64_t l = yc[2] + (uc[1] << 8) +
                          (yc[3] << 16) + (vc[1] << 24);
             *ldst++ = k + (l << 32);
-            yc += 4;
-            uc += 2;
-            vc += 2;
+            yc     += 4;
+            uc     += 2;
+            vc     += 2;
         }
 
 #else
-        int *idst = (int32_t *) dst;
+        int *idst = (int32_t *)dst;
         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
+
         for (i = 0; i < chromWidth; i++) {
 #if HAVE_BIGENDIAN
-            *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
-                (yc[1] << 8) + (vc[0] << 0);
+            *idst++ = (yc[0] << 24) + (uc[0] << 16) +
+                      (yc[1] <<  8) + (vc[0] <<  0);
 #else
             *idst++ = yc[0] + (uc[0] << 8) +
-                (yc[1] << 16) + (vc[0] << 24);
+                      (yc[1] << 16) + (vc[0] << 24);
 #endif
             yc += 2;
             uc++;
             vc++;
         }
 #endif
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
             usrc += chromStride;
             vsrc += chromStride;
         }
@@ -390,9 +415,8 @@ static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                 const uint8_t *vsrc, uint8_t *dst,
-                                int width, int height,
-                                int lumStride, int chromStride,
-                                int dstStride)
+                                int width, int height, int lumStride,
+                                int chromStride, int dstStride)
 {
     //FIXME interpolate chroma
     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
@@ -407,9 +431,10 @@ static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
 {
     int y, i;
     const int chromWidth = width >> 1;
-    for (y=0; y<height; y++) {
+
+    for (y = 0; y < height; y++) {
 #if HAVE_FAST_64BIT
-        uint64_t *ldst = (uint64_t *) dst;
+        uint64_t *ldst = (uint64_t *)dst;
         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
         for (i = 0; i < chromWidth; i += 2) {
             uint64_t k = uc[0] + (yc[0] << 8) +
@@ -417,33 +442,34 @@ static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
             uint64_t l = uc[1] + (yc[2] << 8) +
                          (vc[1] << 16) + (yc[3] << 24);
             *ldst++ = k + (l << 32);
-            yc += 4;
-            uc += 2;
-            vc += 2;
+            yc     += 4;
+            uc     += 2;
+            vc     += 2;
         }
 
 #else
-        int *idst = (int32_t *) dst;
+        int *idst = (int32_t *)dst;
         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
+
         for (i = 0; i < chromWidth; i++) {
 #if HAVE_BIGENDIAN
-            *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
-                (vc[0] << 8) + (yc[1] << 0);
+            *idst++ = (uc[0] << 24) + (yc[0] << 16) +
+                      (vc[0] <<  8) + (yc[1] <<  0);
 #else
             *idst++ = uc[0] + (yc[0] << 8) +
-               (vc[0] << 16) + (yc[1] << 24);
+                      (vc[0] << 16) + (yc[1] << 24);
 #endif
             yc += 2;
             uc++;
             vc++;
         }
 #endif
-        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
+        if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
             usrc += chromStride;
             vsrc += chromStride;
         }
         ysrc += lumStride;
-        dst += dstStride;
+        dst  += dstStride;
     }
 }
 
@@ -453,9 +479,8 @@ static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                 const uint8_t *vsrc, uint8_t *dst,
-                                int width, int height,
-                                int lumStride, int chromStride,
-                                int dstStride)
+                                int width, int height, int lumStride,
+                                int chromStride, int dstStride)
 {
     //FIXME interpolate chroma
     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
@@ -467,9 +492,8 @@ static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
                                    const uint8_t *vsrc, uint8_t *dst,
-                                   int width, int height,
-                                   int lumStride, int chromStride,
-                                   int dstStride)
+                                   int width, int height, int lumStride,
+                                   int chromStride, int dstStride)
 {
     yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                       chromStride, dstStride, 1);
@@ -480,9 +504,8 @@ static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
                                    const uint8_t *vsrc, uint8_t *dst,
-                                   int width, int height,
-                                   int lumStride, int chromStride,
-                                   int dstStride)
+                                   int width, int height, int lumStride,
+                                   int chromStride, int dstStride)
 {
     yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
                       chromStride, dstStride, 1);
@@ -494,26 +517,26 @@ static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
  */
 static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
                                 uint8_t *udst, uint8_t *vdst,
-                                int width, int height,
-                                int lumStride, int chromStride,
-                                int srcStride)
+                                int width, int height, int lumStride,
+                                int chromStride, int srcStride)
 {
     int y;
     const int chromWidth = width >> 1;
-    for (y=0; y<height; y+=2) {
+
+    for (y = 0; y < height; y += 2) {
         int i;
-        for (i=0; i<chromWidth; i++) {
-            ydst[2*i+0]     = src[4*i+0];
-            udst[i]     = src[4*i+1];
-            ydst[2*i+1]     = src[4*i+2];
-            vdst[i]     = src[4*i+3];
+        for (i = 0; i < chromWidth; i++) {
+            ydst[2 * i + 0] = src[4 * i + 0];
+            udst[i]         = src[4 * i + 1];
+            ydst[2 * i + 1] = src[4 * i + 2];
+            vdst[i]         = src[4 * i + 3];
         }
         ydst += lumStride;
         src  += srcStride;
 
-        for (i=0; i<chromWidth; i++) {
-            ydst[2*i+0]     = src[4*i+0];
-            ydst[2*i+1]     = src[4*i+2];
+        for (i = 0; i < chromWidth; i++) {
+            ydst[2 * i + 0] = src[4 * i + 0];
+            ydst[2 * i + 1] = src[4 * i + 2];
         }
         udst += chromStride;
         vdst += chromStride;
@@ -525,46 +548,46 @@ static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
 static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
                               int srcHeight, int srcStride, int dstStride)
 {
-    int x,y;
+    int x, y;
 
-    dst[0]= src[0];
+    dst[0] = src[0];
 
     // first line
-    for (x=0; x<srcWidth-1; x++) {
-        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
-        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
+    for (x = 0; x < srcWidth - 1; x++) {
+        dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
+        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
     }
-    dst[2*srcWidth-1]= src[srcWidth-1];
+    dst[2 * srcWidth - 1] = src[srcWidth - 1];
 
-    dst+= dstStride;
+    dst += dstStride;
 
-    for (y=1; y<srcHeight; y++) {
+    for (y = 1; y < srcHeight; y++) {
         const int mmxSize = 1;
 
-        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
-        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
+        dst[0]         = (src[0] * 3 + src[srcStride]) >> 2;
+        dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
 
-        for (x=mmxSize-1; x<srcWidth-1; x++) {
-            dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
-            dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
-            dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
-            dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
+        for (x = mmxSize - 1; x < srcWidth - 1; x++) {
+            dst[2 * x + 1]             = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
+            dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
+            dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride])     >> 2;
+            dst[2 * x + 2]             = (src[x + 1] * 3 + src[x + srcStride])     >> 2;
         }
-        dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
-        dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
+        dst[srcWidth * 2 - 1]             = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
+        dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
 
-        dst+=dstStride*2;
-        src+=srcStride;
+        dst += dstStride * 2;
+        src += srcStride;
     }
 
     // last line
-    dst[0]= src[0];
+    dst[0] = src[0];
 
-    for (x=0; x<srcWidth-1; x++) {
-        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
-        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
+    for (x = 0; x < srcWidth - 1; x++) {
+        dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
+        dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
     }
-    dst[2*srcWidth-1]= src[srcWidth-1];
+    dst[2 * srcWidth - 1] = src[srcWidth - 1];
 }
 
 /**
@@ -575,26 +598,26 @@ static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
  */
 static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
                                 uint8_t *udst, uint8_t *vdst,
-                                int width, int height,
-                                int lumStride, int chromStride,
-                                int srcStride)
+                                int width, int height, int lumStride,
+                                int chromStride, int srcStride)
 {
     int y;
     const int chromWidth = width >> 1;
-    for (y=0; y<height; y+=2) {
+
+    for (y = 0; y < height; y += 2) {
         int i;
-        for (i=0; i<chromWidth; i++) {
-            udst[i]     = src[4*i+0];
-            ydst[2*i+0] = src[4*i+1];
-            vdst[i]     = src[4*i+2];
-            ydst[2*i+1] = src[4*i+3];
+        for (i = 0; i < chromWidth; i++) {
+            udst[i]         = src[4 * i + 0];
+            ydst[2 * i + 0] = src[4 * i + 1];
+            vdst[i]         = src[4 * i + 2];
+            ydst[2 * i + 1] = src[4 * i + 3];
         }
         ydst += lumStride;
         src  += srcStride;
 
-        for (i=0; i<chromWidth; i++) {
-            ydst[2*i+0] = src[4*i+1];
-            ydst[2*i+1] = src[4*i+3];
+        for (i = 0; i < chromWidth; i++) {
+            ydst[2 * i + 0] = src[4 * i + 1];
+            ydst[2 * i + 1] = src[4 * i + 3];
         }
         udst += chromStride;
         vdst += chromStride;
@@ -619,44 +642,44 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
 
     for (y = 0; y < height; y += 2) {
         int i;
-        for (i=0; i<chromWidth; i++) {
-            unsigned int b = src[6*i+0];
-            unsigned int g = src[6*i+1];
-            unsigned int r = src[6*i+2];
+        for (i = 0; i < chromWidth; i++) {
+            unsigned int b = src[6 * i + 0];
+            unsigned int g = src[6 * i + 1];
+            unsigned int r = src[6 * i + 2];
 
-            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            unsigned int V  =  ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
-            unsigned int U  =  ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
+            unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) +  16;
+            unsigned int V = ((RV * r + GV * g + BV * b) >> RGB2YUV_SHIFT) + 128;
+            unsigned int U = ((RU * r + GU * g + BU * b) >> RGB2YUV_SHIFT) + 128;
 
             udst[i]     = U;
             vdst[i]     = V;
-            ydst[2*i]   = Y;
+            ydst[2 * i] = Y;
 
-            b = src[6*i+3];
-            g = src[6*i+4];
-            r = src[6*i+5];
+            b = src[6 * i + 3];
+            g = src[6 * i + 4];
+            r = src[6 * i + 5];
 
-            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            ydst[2*i+1]     = Y;
+            Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
+            ydst[2 * i + 1] = Y;
         }
         ydst += lumStride;
         src  += srcStride;
 
-        for (i=0; i<chromWidth; i++) {
-            unsigned int b = src[6*i+0];
-            unsigned int g = src[6*i+1];
-            unsigned int r = src[6*i+2];
+        for (i = 0; i < chromWidth; i++) {
+            unsigned int b = src[6 * i + 0];
+            unsigned int g = src[6 * i + 1];
+            unsigned int r = src[6 * i + 2];
 
-            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+            unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
 
-            ydst[2*i]     = Y;
+            ydst[2 * i] = Y;
 
-            b = src[6*i+3];
-            g = src[6*i+4];
-            r = src[6*i+5];
+            b = src[6 * i + 3];
+            g = src[6 * i + 4];
+            r = src[6 * i + 5];
 
-            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
-            ydst[2*i+1]     = Y;
+            Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
+            ydst[2 * i + 1] = Y;
         }
         udst += chromStride;
         vdst += chromStride;
@@ -666,17 +689,16 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
 }
 
 static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
-                              uint8_t *dest, int width,
-                              int height, int src1Stride,
-                              int src2Stride, int dstStride)
+                              uint8_t *dest, int width, int height,
+                              int src1Stride, int src2Stride, int dstStride)
 {
     int h;
 
-    for (h=0; h < height; h++) {
+    for (h = 0; h < height; h++) {
         int w;
-        for (w=0; w < width; w++) {
-            dest[2*w+0] = src1[w];
-            dest[2*w+1] = src2[w];
+        for (w = 0; w < width; w++) {
+            dest[2 * w + 0] = src1[w];
+            dest[2 * w + 1] = src2[w];
         }
         dest += dstStride;
         src1 += src1Stride;
@@ -694,15 +716,15 @@ static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
     int w = width  / 2;
     int h = height / 2;
 
-    for (y=0;y<h;y++) {
-        const uint8_t* s1=src1+srcStride1*(y>>1);
-        uint8_t* d=dst1+dstStride1*y;
+    for (y = 0; y < h; y++) {
+        const uint8_t *s1 = src1 + srcStride1 * (y >> 1);
+        uint8_t *d        = dst1 + dstStride1 *  y;
         for (x = 0; x < w; x++)
             d[2 * x] = d[2 * x + 1] = s1[x];
     }
-    for (y=0;y<h;y++) {
-        const uint8_t* s2=src2+srcStride2*(y>>1);
-        uint8_t* d=dst2+dstStride2*y;
+    for (y = 0; y < h; y++) {
+        const uint8_t *s2 = src2 + srcStride2 * (y >> 1);
+        uint8_t *d        = dst2 + dstStride2 *  y;
         for (x = 0; x < w; x++)
             d[2 * x] = d[2 * x + 1] = s2[x];
     }
@@ -718,33 +740,32 @@ static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
     int w = width / 2;
     int h = height;
 
-    for (y=0;y<h;y++) {
-        const uint8_t* yp=src1+srcStride1*y;
-        const uint8_t* up=src2+srcStride2*(y>>2);
-        const uint8_t* vp=src3+srcStride3*(y>>2);
-        uint8_t* d=dst+dstStride*y;
+    for (y = 0; y < h; y++) {
+        const uint8_t *yp = src1 + srcStride1 *  y;
+        const uint8_t *up = src2 + srcStride2 * (y >> 2);
+        const uint8_t *vp = src3 + srcStride3 * (y >> 2);
+        uint8_t *d        = dst  + dstStride  *  y;
         for (x = 0; x < w; x++) {
-            const int x2 = x<<2;
-            d[8*x+0] = yp[x2];
-            d[8*x+1] = up[x];
-            d[8*x+2] = yp[x2+1];
-            d[8*x+3] = vp[x];
-            d[8*x+4] = yp[x2+2];
-            d[8*x+5] = up[x];
-            d[8*x+6] = yp[x2+3];
-            d[8*x+7] = vp[x];
+            const int x2 = x << 2;
+            d[8 * x + 0] = yp[x2];
+            d[8 * x + 1] = up[x];
+            d[8 * x + 2] = yp[x2 + 1];
+            d[8 * x + 3] = vp[x];
+            d[8 * x + 4] = yp[x2 + 2];
+            d[8 * x + 5] = up[x];
+            d[8 * x + 6] = yp[x2 + 3];
+            d[8 * x + 7] = vp[x];
         }
     }
 }
 
 static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
 {
-    dst +=   count;
-    src += 2*count;
-    count= - count;
-
-    while(count<0) {
-        dst[count]= src[2*count];
+    dst   +=  count;
+    src   +=  count * 2;
+    count  = -count;
+    while (count < 0) {
+        dst[count] = src[2 * count];
         count++;
     }
 }
@@ -752,13 +773,13 @@ static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
 static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
                             int count)
 {
-    dst0+=   count;
-    dst1+=   count;
-    src += 4*count;
-    count= - count;
-    while(count<0) {
-        dst0[count]= src[4*count+0];
-        dst1[count]= src[4*count+2];
+    dst0  +=  count;
+    dst1  +=  count;
+    src   +=  count * 4;
+    count  = -count;
+    while (count < 0) {
+        dst0[count] = src[4 * count + 0];
+        dst1[count] = src[4 * count + 2];
         count++;
     }
 }
@@ -766,14 +787,14 @@ static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
 static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
                                uint8_t *dst0, uint8_t *dst1, int count)
 {
-    dst0 +=   count;
-    dst1 +=   count;
-    src0 += 4*count;
-    src1 += 4*count;
-    count= - count;
-    while(count<0) {
-        dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
-        dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
+    dst0  +=  count;
+    dst1  +=  count;
+    src0  +=  count * 4;
+    src1  +=  count * 4;
+    count  = -count;
+    while (count < 0) {
+        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
+        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
         count++;
     }
 }
@@ -781,14 +802,14 @@ static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
 static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
                            int count)
 {
-    dst0+=   count;
-    dst1+=   count;
-    src += 4*count;
-    count= - count;
+    dst0  +=  count;
+    dst1  +=  count;
+    src   +=  count * 4;
+    count  = -count;
     src++;
-    while(count<0) {
-        dst0[count]= src[4*count+0];
-        dst1[count]= src[4*count+2];
+    while (count < 0) {
+        dst0[count] = src[4 * count + 0];
+        dst1[count] = src[4 * count + 2];
         count++;
     }
 }
@@ -796,16 +817,16 @@ static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
 static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
                               uint8_t *dst0, uint8_t *dst1, int count)
 {
-    dst0 +=   count;
-    dst1 +=   count;
-    src0 += 4*count;
-    src1 += 4*count;
-    count= - count;
+    dst0  +=  count;
+    dst1  +=  count;
+    src0  +=  count * 4;
+    src1  +=  count * 4;
+    count  = -count;
     src0++;
     src1++;
-    while(count<0) {
-        dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
-        dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
+    while (count < 0) {
+        dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
+        dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
         count++;
     }
 }
@@ -815,18 +836,18 @@ static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                            int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth= -((-width)>>1);
+    const int chromWidth = -((-width) >> 1);
 
-    for (y=0; y<height; y++) {
+    for (y = 0; y < height; y++) {
         extract_even_c(src, ydst, width);
-        if(y&1) {
+        if (y & 1) {
             extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
-            udst+= chromStride;
-            vdst+= chromStride;
+            udst += chromStride;
+            vdst += chromStride;
         }
 
-        src += srcStride;
-        ydst+= lumStride;
+        src  += srcStride;
+        ydst += lumStride;
     }
 }
 
@@ -835,16 +856,16 @@ static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                            int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth= -((-width)>>1);
+    const int chromWidth = -((-width) >> 1);
 
-    for (y=0; y<height; y++) {
+    for (y = 0; y < height; y++) {
         extract_even_c(src, ydst, width);
         extract_odd2_c(src, udst, vdst, chromWidth);
 
-        src += srcStride;
-        ydst+= lumStride;
-        udst+= chromStride;
-        vdst+= chromStride;
+        src  += srcStride;
+        ydst += lumStride;
+        udst += chromStride;
+        vdst += chromStride;
     }
 }
 
@@ -853,18 +874,18 @@ static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                            int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth= -((-width)>>1);
+    const int chromWidth = -((-width) >> 1);
 
-    for (y=0; y<height; y++) {
+    for (y = 0; y < height; y++) {
         extract_even_c(src + 1, ydst, width);
-        if(y&1) {
+        if (y & 1) {
             extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
-            udst+= chromStride;
-            vdst+= chromStride;
+            udst += chromStride;
+            vdst += chromStride;
         }
 
-        src += srcStride;
-        ydst+= lumStride;
+        src  += srcStride;
+        ydst += lumStride;
     }
 }
 
@@ -873,16 +894,16 @@ static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                            int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth= -((-width)>>1);
+    const int chromWidth = -((-width) >> 1);
 
-    for (y=0; y<height; y++) {
+    for (y = 0; y < height; y++) {
         extract_even_c(src + 1, ydst, width);
         extract_even2_c(src, udst, vdst, chromWidth);
 
-        src += srcStride;
-        ydst+= lumStride;
-        udst+= chromStride;
-        vdst+= chromStride;
+        src  += srcStride;
+        ydst += lumStride;
+        udst += chromStride;
+        vdst += chromStride;
     }
 }
 
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 24dc96032d..1c44a2f544 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -28,12 +28,12 @@
 #include <inttypes.h>
 #include <assert.h>
 
+#include "libavutil/cpu.h"
+#include "libavutil/bswap.h"
 #include "config.h"
 #include "rgb2rgb.h"
 #include "swscale.h"
 #include "swscale_internal.h"
-#include "libavutil/cpu.h"
-#include "libavutil/bswap.h"
 
 extern const uint8_t dither_4x4_16[4][8];
 extern const uint8_t dither_8x8_32[8][8];
@@ -41,14 +41,14 @@ extern const uint8_t dither_8x8_73[8][8];
 extern const uint8_t dither_8x8_220[8][8];
 
 const int32_t ff_yuv2rgb_coeffs[8][4] = {
-    {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
-    {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
-    {104597, 132201, 25675, 53279}, /* unspecified */
-    {104597, 132201, 25675, 53279}, /* reserved */
-    {104448, 132798, 24759, 53109}, /* FCC */
-    {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
-    {104597, 132201, 25675, 53279}, /* SMPTE 170M */
-    {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
+    { 117504, 138453, 13954, 34903 }, /* no sequence_display_extension */
+    { 117504, 138453, 13954, 34903 }, /* ITU-R Rec. 709 (1990) */
+    { 104597, 132201, 25675, 53279 }, /* unspecified */
+    { 104597, 132201, 25675, 53279 }, /* reserved */
+    { 104448, 132798, 24759, 53109 }, /* FCC */
+    { 104597, 132201, 25675, 53279 }, /* ITU-R Rec. 624-4 System B, G */
+    { 104597, 132201, 25675, 53279 }, /* SMPTE 170M */
+    { 117579, 136230, 16907, 35559 }  /* SMPTE 240M (1987) */
 };
 
 const int *sws_getCoefficients(int colorspace)
@@ -65,503 +65,548 @@ const int *sws_getCoefficients(int colorspace)
     g = (void *)(c->table_gU[U] + c->table_gV[V]);  \
     b = (void *)c->table_bU[U];
 
-#define PUTRGB(dst,src,i)            \
-    Y = src[2*i];                    \
-    dst[2*i  ] = r[Y] + g[Y] + b[Y]; \
-    Y = src[2*i+1];                  \
-    dst[2*i+1] = r[Y] + g[Y] + b[Y];
+#define PUTRGB(dst, src, i)                         \
+    Y              = src[2 * i];                    \
+    dst[2 * i]     = r[Y] + g[Y] + b[Y];            \
+    Y              = src[2 * i + 1];                \
+    dst[2 * i + 1] = r[Y] + g[Y] + b[Y];
 
-#define PUTRGB24(dst,src,i)                                  \
-    Y = src[2*i];                                            \
-    dst[6*i+0] = r[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = b[Y]; \
-    Y = src[2*i+1];                                          \
-    dst[6*i+3] = r[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = b[Y];
+#define PUTRGB24(dst, src, i)                       \
+    Y              = src[2 * i];                    \
+    dst[6 * i + 0] = r[Y];                          \
+    dst[6 * i + 1] = g[Y];                          \
+    dst[6 * i + 2] = b[Y];                          \
+    Y              = src[2 * i + 1];                \
+    dst[6 * i + 3] = r[Y];                          \
+    dst[6 * i + 4] = g[Y];                          \
+    dst[6 * i + 5] = b[Y];
 
-#define PUTBGR24(dst,src,i)                                  \
-    Y = src[2*i];                                            \
-    dst[6*i+0] = b[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = r[Y]; \
-    Y = src[2*i+1];                                          \
-    dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y];
+#define PUTBGR24(dst, src, i)                       \
+    Y              = src[2 * i];                    \
+    dst[6 * i + 0] = b[Y];                          \
+    dst[6 * i + 1] = g[Y];                          \
+    dst[6 * i + 2] = r[Y];                          \
+    Y              = src[2 * i + 1];                \
+    dst[6 * i + 3] = b[Y];                          \
+    dst[6 * i + 4] = g[Y];                          \
+    dst[6 * i + 5] = r[Y];
 
-#define PUTRGBA(dst,ysrc,asrc,i,s)                      \
-    Y = ysrc[2*i];                                      \
-    dst[2*i  ] = r[Y] + g[Y] + b[Y] + (asrc[2*i  ]<<s); \
-    Y = ysrc[2*i+1];                                    \
-    dst[2*i+1] = r[Y] + g[Y] + b[Y] + (asrc[2*i+1]<<s);
+#define PUTRGBA(dst, ysrc, asrc, i, s)                                  \
+    Y              = ysrc[2 * i];                                       \
+    dst[2 * i]     = r[Y] + g[Y] + b[Y] + (asrc[2 * i]     << s);       \
+    Y              = ysrc[2 * i + 1];                                   \
+    dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << s);
 
-#define PUTRGB48(dst,src,i)             \
-    Y = src[2*i];                       \
-    dst[12*i+ 0] = dst[12*i+ 1] = r[Y]; \
-    dst[12*i+ 2] = dst[12*i+ 3] = g[Y]; \
-    dst[12*i+ 4] = dst[12*i+ 5] = b[Y]; \
-    Y = src[2*i+1];                     \
-    dst[12*i+ 6] = dst[12*i+ 7] = r[Y]; \
-    dst[12*i+ 8] = dst[12*i+ 9] = g[Y]; \
-    dst[12*i+10] = dst[12*i+11] = b[Y];
+#define PUTRGB48(dst, src, i)                       \
+    Y                = src[ 2 * i];                 \
+    dst[12 * i +  0] = dst[12 * i +  1] = r[Y];     \
+    dst[12 * i +  2] = dst[12 * i +  3] = g[Y];     \
+    dst[12 * i +  4] = dst[12 * i +  5] = b[Y];     \
+    Y                = src[ 2 * i + 1];             \
+    dst[12 * i +  6] = dst[12 * i +  7] = r[Y];     \
+    dst[12 * i +  8] = dst[12 * i +  9] = g[Y];     \
+    dst[12 * i + 10] = dst[12 * i + 11] = b[Y];
 
-#define PUTBGR48(dst,src,i)             \
-    Y = src[2*i];                       \
-    dst[12*i+ 0] = dst[12*i+ 1] = b[Y]; \
-    dst[12*i+ 2] = dst[12*i+ 3] = g[Y]; \
-    dst[12*i+ 4] = dst[12*i+ 5] = r[Y]; \
-    Y = src[2*i+1];                     \
-    dst[12*i+ 6] = dst[12*i+ 7] = b[Y]; \
-    dst[12*i+ 8] = dst[12*i+ 9] = g[Y]; \
-    dst[12*i+10] = dst[12*i+11] = r[Y];
+#define PUTBGR48(dst, src, i)                       \
+    Y                = src[2 * i];                  \
+    dst[12 * i +  0] = dst[12 * i +  1] = b[Y];     \
+    dst[12 * i +  2] = dst[12 * i +  3] = g[Y];     \
+    dst[12 * i +  4] = dst[12 * i +  5] = r[Y];     \
+    Y                = src[2  * i +  1];            \
+    dst[12 * i +  6] = dst[12 * i +  7] = b[Y];     \
+    dst[12 * i +  8] = dst[12 * i +  9] = g[Y];     \
+    dst[12 * i + 10] = dst[12 * i + 11] = r[Y];
 
-#define YUV2RGBFUNC(func_name, dst_type, alpha) \
-static int func_name(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, \
-                     int srcSliceH, uint8_t* dst[], int dstStride[]) \
-{\
-    int y;\
-\
-    if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\
-        srcStride[1] *= 2;\
-        srcStride[2] *= 2;\
-    }\
-    for (y=0; y<srcSliceH; y+=2) {\
-        dst_type *dst_1 = (dst_type*)(dst[0] + (y+srcSliceY  )*dstStride[0]);\
-        dst_type *dst_2 = (dst_type*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);\
-        dst_type av_unused *r, *b;\
-        dst_type *g;\
-        const uint8_t *py_1 = src[0] + y*srcStride[0];\
-        const uint8_t *py_2 = py_1 + srcStride[0];\
-        const uint8_t *pu = src[1] + (y>>1)*srcStride[1];\
-        const uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
-        const uint8_t av_unused *pa_1, *pa_2;\
-        unsigned int h_size = c->dstW>>3;\
-        if (alpha) {\
-            pa_1 = src[3] + y*srcStride[3];\
-            pa_2 = pa_1 + srcStride[3];\
-        }\
-        while (h_size--) {\
-            int av_unused U, V;\
-            int Y;\
+#define YUV2RGBFUNC(func_name, dst_type, alpha)                             \
+    static int func_name(SwsContext *c, const uint8_t *src[],               \
+                         int srcStride[], int srcSliceY, int srcSliceH,     \
+                         uint8_t *dst[], int dstStride[])                   \
+    {                                                                       \
+        int y;                                                              \
+                                                                            \
+        if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {                    \
+            srcStride[1] *= 2;                                              \
+            srcStride[2] *= 2;                                              \
+        }                                                                   \
+        for (y = 0; y < srcSliceH; y += 2) {                                \
+            dst_type *dst_1 =                                               \
+                (dst_type *)(dst[0] + (y + srcSliceY)     * dstStride[0]);  \
+            dst_type *dst_2 =                                               \
+                (dst_type *)(dst[0] + (y + srcSliceY + 1) * dstStride[0]);  \
+            dst_type av_unused *r, *g, *b;                                  \
+            const uint8_t *py_1 = src[0] +  y       * srcStride[0];         \
+            const uint8_t *py_2 = py_1   +            srcStride[0];         \
+            const uint8_t *pu   = src[1] + (y >> 1) * srcStride[1];         \
+            const uint8_t *pv   = src[2] + (y >> 1) * srcStride[2];         \
+            const uint8_t av_unused *pa_1, *pa_2;                           \
+            unsigned int h_size = c->dstW >> 3;                             \
+            if (alpha) {                                                    \
+                pa_1 = src[3] + y * srcStride[3];                           \
+                pa_2 = pa_1   +     srcStride[3];                           \
+            }                                                               \
+            while (h_size--) {                                              \
+                int av_unused U, V, Y;                                      \
 
-#define ENDYUV2RGBLINE(dst_delta)\
-            pu += 4;\
-            pv += 4;\
-            py_1 += 8;\
-            py_2 += 8;\
-            dst_1 += dst_delta;\
-            dst_2 += dst_delta;\
-        }\
-        if (c->dstW & 4) {\
-            int av_unused Y, U, V;\
+#define ENDYUV2RGBLINE(dst_delta)                   \
+    pu    += 4;                                     \
+    pv    += 4;                                     \
+    py_1  += 8;                                     \
+    py_2  += 8;                                     \
+    dst_1 += dst_delta;                             \
+    dst_2 += dst_delta;                             \
+    }                                               \
+    if (c->dstW & 4) {                              \
+        int av_unused Y, U, V;                      \
 
-#define ENDYUV2RGBFUNC()\
-        }\
-    }\
-    return srcSliceH;\
-}
+#define ENDYUV2RGBFUNC()                            \
+            }                                       \
+        }                                           \
+        return srcSliceH;                           \
+    }
 
-#define CLOSEYUV2RGBFUNC(dst_delta)\
-    ENDYUV2RGBLINE(dst_delta)\
+#define CLOSEYUV2RGBFUNC(dst_delta)                 \
+    ENDYUV2RGBLINE(dst_delta)                       \
     ENDYUV2RGBFUNC()
 
 YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
     LOADCHROMA(0);
-    PUTRGB48(dst_1,py_1,0);
-    PUTRGB48(dst_2,py_2,0);
+    PUTRGB48(dst_1, py_1, 0);
+    PUTRGB48(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTRGB48(dst_2,py_2,1);
-    PUTRGB48(dst_1,py_1,1);
+    PUTRGB48(dst_2, py_2, 1);
+    PUTRGB48(dst_1, py_1, 1);
 
     LOADCHROMA(2);
-    PUTRGB48(dst_1,py_1,2);
-    PUTRGB48(dst_2,py_2,2);
+    PUTRGB48(dst_1, py_1, 2);
+    PUTRGB48(dst_2, py_2, 2);
 
     LOADCHROMA(3);
-    PUTRGB48(dst_2,py_2,3);
-    PUTRGB48(dst_1,py_1,3);
+    PUTRGB48(dst_2, py_2, 3);
+    PUTRGB48(dst_1, py_1, 3);
 ENDYUV2RGBLINE(48)
     LOADCHROMA(0);
-    PUTRGB48(dst_1,py_1,0);
-    PUTRGB48(dst_2,py_2,0);
+    PUTRGB48(dst_1, py_1, 0);
+    PUTRGB48(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTRGB48(dst_2,py_2,1);
-    PUTRGB48(dst_1,py_1,1);
+    PUTRGB48(dst_2, py_2, 1);
+    PUTRGB48(dst_1, py_1, 1);
 ENDYUV2RGBFUNC()
 
 YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0)
     LOADCHROMA(0);
-    PUTBGR48(dst_1,py_1,0);
-    PUTBGR48(dst_2,py_2,0);
+    PUTBGR48(dst_1, py_1, 0);
+    PUTBGR48(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTBGR48(dst_2,py_2,1);
-    PUTBGR48(dst_1,py_1,1);
+    PUTBGR48(dst_2, py_2, 1);
+    PUTBGR48(dst_1, py_1, 1);
 
     LOADCHROMA(2);
-    PUTBGR48(dst_1,py_1,2);
-    PUTBGR48(dst_2,py_2,2);
+    PUTBGR48(dst_1, py_1, 2);
+    PUTBGR48(dst_2, py_2, 2);
 
     LOADCHROMA(3);
-    PUTBGR48(dst_2,py_2,3);
-    PUTBGR48(dst_1,py_1,3);
+    PUTBGR48(dst_2, py_2, 3);
+    PUTBGR48(dst_1, py_1, 3);
 ENDYUV2RGBLINE(48)
     LOADCHROMA(0);
-    PUTBGR48(dst_1,py_1,0);
-    PUTBGR48(dst_2,py_2,0);
+    PUTBGR48(dst_1, py_1, 0);
+    PUTBGR48(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTBGR48(dst_2,py_2,1);
-    PUTBGR48(dst_1,py_1,1);
+    PUTBGR48(dst_2, py_2, 1);
+    PUTBGR48(dst_1, py_1, 1);
 ENDYUV2RGBFUNC()
 
 YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
     LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0);
-    PUTRGB(dst_2,py_2,0);
+    PUTRGB(dst_1, py_1, 0);
+    PUTRGB(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1);
-    PUTRGB(dst_1,py_1,1);
+    PUTRGB(dst_2, py_2, 1);
+    PUTRGB(dst_1, py_1, 1);
 
     LOADCHROMA(2);
-    PUTRGB(dst_1,py_1,2);
-    PUTRGB(dst_2,py_2,2);
+    PUTRGB(dst_1, py_1, 2);
+    PUTRGB(dst_2, py_2, 2);
 
     LOADCHROMA(3);
-    PUTRGB(dst_2,py_2,3);
-    PUTRGB(dst_1,py_1,3);
+    PUTRGB(dst_2, py_2, 3);
+    PUTRGB(dst_1, py_1, 3);
 ENDYUV2RGBLINE(8)
     LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0);
-    PUTRGB(dst_2,py_2,0);
+    PUTRGB(dst_1, py_1, 0);
+    PUTRGB(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1);
-    PUTRGB(dst_1,py_1,1);
+    PUTRGB(dst_2, py_2, 1);
+    PUTRGB(dst_1, py_1, 1);
 ENDYUV2RGBFUNC()
 
 YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
     LOADCHROMA(0);
-    PUTRGBA(dst_1,py_1,pa_1,0,24);
-    PUTRGBA(dst_2,py_2,pa_2,0,24);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
 
     LOADCHROMA(1);
-    PUTRGBA(dst_2,py_2,pa_1,1,24);
-    PUTRGBA(dst_1,py_1,pa_2,1,24);
+    PUTRGBA(dst_2, py_2, pa_1, 1, 24);
+    PUTRGBA(dst_1, py_1, pa_2, 1, 24);
 
     LOADCHROMA(2);
-    PUTRGBA(dst_1,py_1,pa_1,2,24);
-    PUTRGBA(dst_2,py_2,pa_2,2,24);
+    PUTRGBA(dst_1, py_1, pa_1, 2, 24);
+    PUTRGBA(dst_2, py_2, pa_2, 2, 24);
 
     LOADCHROMA(3);
-    PUTRGBA(dst_2,py_2,pa_1,3,24);
-    PUTRGBA(dst_1,py_1,pa_2,3,24);
-    pa_1 += 8;\
-    pa_2 += 8;\
+    PUTRGBA(dst_2, py_2, pa_1, 3, 24);
+    PUTRGBA(dst_1, py_1, pa_2, 3, 24);
+    pa_1 += 8; \
+    pa_2 += 8; \
 ENDYUV2RGBLINE(8)
     LOADCHROMA(0);
-    PUTRGBA(dst_1,py_1,pa_1,0,24);
-    PUTRGBA(dst_2,py_2,pa_2,0,24);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
 
     LOADCHROMA(1);
-    PUTRGBA(dst_2,py_2,pa_1,1,24);
-    PUTRGBA(dst_1,py_1,pa_2,1,24);
+    PUTRGBA(dst_2, py_2, pa_1, 1, 24);
+    PUTRGBA(dst_1, py_1, pa_2, 1, 24);
 ENDYUV2RGBFUNC()
 
 YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
     LOADCHROMA(0);
-    PUTRGBA(dst_1,py_1,pa_1,0,0);
-    PUTRGBA(dst_2,py_2,pa_2,0,0);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
 
     LOADCHROMA(1);
-    PUTRGBA(dst_2,py_2,pa_2,1,0);
-    PUTRGBA(dst_1,py_1,pa_1,1,0);
+    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
+    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
 
     LOADCHROMA(2);
-    PUTRGBA(dst_1,py_1,pa_1,2,0);
-    PUTRGBA(dst_2,py_2,pa_2,2,0);
+    PUTRGBA(dst_1, py_1, pa_1, 2, 0);
+    PUTRGBA(dst_2, py_2, pa_2, 2, 0);
 
     LOADCHROMA(3);
-    PUTRGBA(dst_2,py_2,pa_2,3,0);
-    PUTRGBA(dst_1,py_1,pa_1,3,0);
-    pa_1 += 8;\
-    pa_2 += 8;\
+    PUTRGBA(dst_2, py_2, pa_2, 3, 0);
+    PUTRGBA(dst_1, py_1, pa_1, 3, 0);
+    pa_1 += 8; \
+    pa_2 += 8; \
 ENDYUV2RGBLINE(8)
     LOADCHROMA(0);
-    PUTRGBA(dst_1,py_1,pa_1,0,0);
-    PUTRGBA(dst_2,py_2,pa_2,0,0);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
 
     LOADCHROMA(1);
-    PUTRGBA(dst_2,py_2,pa_2,1,0);
-    PUTRGBA(dst_1,py_1,pa_1,1,0);
+    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
+    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
 ENDYUV2RGBFUNC()
 
 YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
     LOADCHROMA(0);
-    PUTRGB24(dst_1,py_1,0);
-    PUTRGB24(dst_2,py_2,0);
+    PUTRGB24(dst_1, py_1, 0);
+    PUTRGB24(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTRGB24(dst_2,py_2,1);
-    PUTRGB24(dst_1,py_1,1);
+    PUTRGB24(dst_2, py_2, 1);
+    PUTRGB24(dst_1, py_1, 1);
 
     LOADCHROMA(2);
-    PUTRGB24(dst_1,py_1,2);
-    PUTRGB24(dst_2,py_2,2);
+    PUTRGB24(dst_1, py_1, 2);
+    PUTRGB24(dst_2, py_2, 2);
 
     LOADCHROMA(3);
-    PUTRGB24(dst_2,py_2,3);
-    PUTRGB24(dst_1,py_1,3);
+    PUTRGB24(dst_2, py_2, 3);
+    PUTRGB24(dst_1, py_1, 3);
 ENDYUV2RGBLINE(24)
     LOADCHROMA(0);
-    PUTRGB24(dst_1,py_1,0);
-    PUTRGB24(dst_2,py_2,0);
+    PUTRGB24(dst_1, py_1, 0);
+    PUTRGB24(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTRGB24(dst_2,py_2,1);
-    PUTRGB24(dst_1,py_1,1);
+    PUTRGB24(dst_2, py_2, 1);
+    PUTRGB24(dst_1, py_1, 1);
 ENDYUV2RGBFUNC()
 
 // only trivial mods from yuv2rgb_c_24_rgb
 YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
     LOADCHROMA(0);
-    PUTBGR24(dst_1,py_1,0);
-    PUTBGR24(dst_2,py_2,0);
+    PUTBGR24(dst_1, py_1, 0);
+    PUTBGR24(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTBGR24(dst_2,py_2,1);
-    PUTBGR24(dst_1,py_1,1);
+    PUTBGR24(dst_2, py_2, 1);
+    PUTBGR24(dst_1, py_1, 1);
 
     LOADCHROMA(2);
-    PUTBGR24(dst_1,py_1,2);
-    PUTBGR24(dst_2,py_2,2);
+    PUTBGR24(dst_1, py_1, 2);
+    PUTBGR24(dst_2, py_2, 2);
 
     LOADCHROMA(3);
-    PUTBGR24(dst_2,py_2,3);
-    PUTBGR24(dst_1,py_1,3);
+    PUTBGR24(dst_2, py_2, 3);
+    PUTBGR24(dst_1, py_1, 3);
 ENDYUV2RGBLINE(24)
     LOADCHROMA(0);
-    PUTBGR24(dst_1,py_1,0);
-    PUTBGR24(dst_2,py_2,0);
+    PUTBGR24(dst_1, py_1, 0);
+    PUTBGR24(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTBGR24(dst_2,py_2,1);
-    PUTBGR24(dst_1,py_1,1);
+    PUTBGR24(dst_2, py_2, 1);
+    PUTBGR24(dst_1, py_1, 1);
 ENDYUV2RGBFUNC()
 
 // This is exactly the same code as yuv2rgb_c_32 except for the types of
 // r, g, b, dst_1, dst_2
 YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0)
     LOADCHROMA(0);
-    PUTRGB(dst_1,py_1,0);
-    PUTRGB(dst_2,py_2,0);
+    PUTRGB(dst_1, py_1, 0);
+    PUTRGB(dst_2, py_2, 0);
 
     LOADCHROMA(1);
-    PUTRGB(dst_2,py_2,1);
-    PUTRGB(dst_1,py_1,1);
+    PUTRGB(dst_2, py_2, 1);
+    PUTRGB(dst_1, py_1, 1);
 
     LOADCHROMA(2);
-    PUTRGB(dst_1,py_1,2);
-    PUTRGB(dst_2,py_2,2);
+    PUTRGB(dst_1, py_1, 2);
+    PUTRGB(dst_2, py_2, 2);
 
     LOADCHROMA(3);
-    PUTRGB(dst_2,py_2,3);
-    PUTRGB(dst_1,py_1,3);
+    PUTRGB(dst_2, py_2, 3);
+    PUTRGB(dst_1, py_1, 3);
 CLOSEYUV2RGBFUNC(8)
 
 // r, g, b, dst_1, dst_2
 YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
-    const uint8_t *d16 = dither_4x4_16[y&3];
-#define PUTRGB12(dst,src,i,o)                                   \
-    Y = src[2*i];                                               \
-    dst[2*i]   = r[Y+d16[0+o]] + g[Y+d16[0+o]] + b[Y+d16[0+o]]; \
-    Y = src[2*i+1];                                             \
-    dst[2*i+1] = r[Y+d16[1+o]] + g[Y+d16[1+o]] + b[Y+d16[1+o]];
+    const uint8_t *d16 = dither_4x4_16[y & 3];
+
+#define PUTRGB12(dst, src, i, o)                    \
+    Y              = src[2 * i];                    \
+    dst[2 * i]     = r[Y + d16[0 + o]] +            \
+                     g[Y + d16[0 + o]] +            \
+                     b[Y + d16[0 + o]];             \
+    Y              = src[2 * i + 1];                \
+    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
+                     g[Y + d16[1 + o]] +            \
+                     b[Y + d16[1 + o]];
 
     LOADCHROMA(0);
-    PUTRGB12(dst_1,py_1,0,0);
-    PUTRGB12(dst_2,py_2,0,0+8);
+    PUTRGB12(dst_1, py_1, 0, 0);
+    PUTRGB12(dst_2, py_2, 0, 0 + 8);
 
     LOADCHROMA(1);
-    PUTRGB12(dst_2,py_2,1,2+8);
-    PUTRGB12(dst_1,py_1,1,2);
+    PUTRGB12(dst_2, py_2, 1, 2 + 8);
+    PUTRGB12(dst_1, py_1, 1, 2);
 
     LOADCHROMA(2);
-    PUTRGB12(dst_1,py_1,2,4);
-    PUTRGB12(dst_2,py_2,2,4+8);
+    PUTRGB12(dst_1, py_1, 2, 4);
+    PUTRGB12(dst_2, py_2, 2, 4 + 8);
 
     LOADCHROMA(3);
-    PUTRGB12(dst_2,py_2,3,6+8);
-    PUTRGB12(dst_1,py_1,3,6);
+    PUTRGB12(dst_2, py_2, 3, 6 + 8);
+    PUTRGB12(dst_1, py_1, 3, 6);
 CLOSEYUV2RGBFUNC(8)
 
 // r, g, b, dst_1, dst_2
 YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
-    const uint8_t *d32 = dither_8x8_32[y&7];
-    const uint8_t *d64 = dither_8x8_73[y&7];
-#define PUTRGB8(dst,src,i,o)                                    \
-    Y = src[2*i];                                               \
-    dst[2*i]   = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \
-    Y = src[2*i+1];                                             \
-    dst[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
+    const uint8_t *d32 = dither_8x8_32[y & 7];
+    const uint8_t *d64 = dither_8x8_73[y & 7];
+
+#define PUTRGB8(dst, src, i, o)                     \
+    Y              = src[2 * i];                    \
+    dst[2 * i]     = r[Y + d32[0 + o]] +            \
+                     g[Y + d32[0 + o]] +            \
+                     b[Y + d64[0 + o]];             \
+    Y              = src[2 * i + 1];                \
+    dst[2 * i + 1] = r[Y + d32[1 + o]] +            \
+                     g[Y + d32[1 + o]] +            \
+                     b[Y + d64[1 + o]];
 
     LOADCHROMA(0);
-    PUTRGB8(dst_1,py_1,0,0);
-    PUTRGB8(dst_2,py_2,0,0+8);
+    PUTRGB8(dst_1, py_1, 0, 0);
+    PUTRGB8(dst_2, py_2, 0, 0 + 8);
 
     LOADCHROMA(1);
-    PUTRGB8(dst_2,py_2,1,2+8);
-    PUTRGB8(dst_1,py_1,1,2);
+    PUTRGB8(dst_2, py_2, 1, 2 + 8);
+    PUTRGB8(dst_1, py_1, 1, 2);
 
     LOADCHROMA(2);
-    PUTRGB8(dst_1,py_1,2,4);
-    PUTRGB8(dst_2,py_2,2,4+8);
+    PUTRGB8(dst_1, py_1, 2, 4);
+    PUTRGB8(dst_2, py_2, 2, 4 + 8);
 
     LOADCHROMA(3);
-    PUTRGB8(dst_2,py_2,3,6+8);
-    PUTRGB8(dst_1,py_1,3,6);
+    PUTRGB8(dst_2, py_2, 3, 6 + 8);
+    PUTRGB8(dst_1, py_1, 3, 6);
 CLOSEYUV2RGBFUNC(8)
 
 YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
-    const uint8_t *d64 =  dither_8x8_73[y&7];
-    const uint8_t *d128 = dither_8x8_220[y&7];
+    const uint8_t * d64 = dither_8x8_73[y & 7];
+    const uint8_t *d128 = dither_8x8_220[y & 7];
     int acc;
 
-#define PUTRGB4D(dst,src,i,o)                                     \
-    Y = src[2*i];                                                 \
-    acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]];        \
-    Y = src[2*i+1];                                               \
-    acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4;  \
-    dst[i]= acc;
+#define PUTRGB4D(dst, src, i, o)                    \
+    Y      = src[2 * i];                            \
+    acc    = r[Y + d128[0 + o]] +                   \
+             g[Y +  d64[0 + o]] +                   \
+             b[Y + d128[0 + o]];                    \
+    Y      = src[2 * i + 1];                        \
+    acc   |= (r[Y + d128[1 + o]] +                  \
+              g[Y +  d64[1 + o]] +                  \
+              b[Y + d128[1 + o]]) << 4;             \
+    dst[i] = acc;
 
     LOADCHROMA(0);
-    PUTRGB4D(dst_1,py_1,0,0);
-    PUTRGB4D(dst_2,py_2,0,0+8);
+    PUTRGB4D(dst_1, py_1, 0, 0);
+    PUTRGB4D(dst_2, py_2, 0, 0 + 8);
 
     LOADCHROMA(1);
-    PUTRGB4D(dst_2,py_2,1,2+8);
-    PUTRGB4D(dst_1,py_1,1,2);
+    PUTRGB4D(dst_2, py_2, 1, 2 + 8);
+    PUTRGB4D(dst_1, py_1, 1, 2);
 
     LOADCHROMA(2);
-    PUTRGB4D(dst_1,py_1,2,4);
-    PUTRGB4D(dst_2,py_2,2,4+8);
+    PUTRGB4D(dst_1, py_1, 2, 4);
+    PUTRGB4D(dst_2, py_2, 2, 4 + 8);
 
     LOADCHROMA(3);
-    PUTRGB4D(dst_2,py_2,3,6+8);
-    PUTRGB4D(dst_1,py_1,3,6);
+    PUTRGB4D(dst_2, py_2, 3, 6 + 8);
+    PUTRGB4D(dst_1, py_1, 3, 6);
 CLOSEYUV2RGBFUNC(4)
 
 YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
-    const uint8_t *d64 =  dither_8x8_73[y&7];
-    const uint8_t *d128 = dither_8x8_220[y&7];
+    const uint8_t *d64  = dither_8x8_73[y & 7];
+    const uint8_t *d128 = dither_8x8_220[y & 7];
 
-#define PUTRGB4DB(dst,src,i,o)                                    \
-    Y = src[2*i];                                                 \
-    dst[2*i]   = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
-    Y = src[2*i+1];                                               \
-    dst[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
+#define PUTRGB4DB(dst, src, i, o)                   \
+    Y              = src[2 * i];                    \
+    dst[2 * i]     = r[Y + d128[0 + o]] +           \
+                     g[Y +  d64[0 + o]] +           \
+                     b[Y + d128[0 + o]];            \
+    Y              = src[2 * i + 1];                \
+    dst[2 * i + 1] = r[Y + d128[1 + o]] +           \
+                     g[Y +  d64[1 + o]] +           \
+                     b[Y + d128[1 + o]];
 
     LOADCHROMA(0);
-    PUTRGB4DB(dst_1,py_1,0,0);
-    PUTRGB4DB(dst_2,py_2,0,0+8);
+    PUTRGB4DB(dst_1, py_1, 0, 0);
+    PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
 
     LOADCHROMA(1);
-    PUTRGB4DB(dst_2,py_2,1,2+8);
-    PUTRGB4DB(dst_1,py_1,1,2);
+    PUTRGB4DB(dst_2, py_2, 1, 2 + 8);
+    PUTRGB4DB(dst_1, py_1, 1, 2);
 
     LOADCHROMA(2);
-    PUTRGB4DB(dst_1,py_1,2,4);
-    PUTRGB4DB(dst_2,py_2,2,4+8);
+    PUTRGB4DB(dst_1, py_1, 2, 4);
+    PUTRGB4DB(dst_2, py_2, 2, 4 + 8);
 
     LOADCHROMA(3);
-    PUTRGB4DB(dst_2,py_2,3,6+8);
-    PUTRGB4DB(dst_1,py_1,3,6);
+    PUTRGB4DB(dst_2, py_2, 3, 6 + 8);
+    PUTRGB4DB(dst_1, py_1, 3, 6);
 CLOSEYUV2RGBFUNC(8)
 
 YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
-        const uint8_t *d128 = dither_8x8_220[y&7];
-        char out_1 = 0, out_2 = 0;
-        g= c->table_gU[128] + c->table_gV[128];
+    const uint8_t *d128 = dither_8x8_220[y & 7];
+    char out_1 = 0, out_2 = 0;
+    g = c->table_gU[128] + c->table_gV[128];
 
-#define PUTRGB1(out,src,i,o)    \
-    Y = src[2*i];               \
-    out+= out + g[Y+d128[0+o]]; \
-    Y = src[2*i+1];             \
-    out+= out + g[Y+d128[1+o]];
+#define PUTRGB1(out, src, i, o)                     \
+    Y    = src[2 * i];                              \
+    out += out + g[Y + d128[0 + o]];                \
+    Y    = src[2 * i + 1];                          \
+    out += out + g[Y + d128[1 + o]];
 
-    PUTRGB1(out_1,py_1,0,0);
-    PUTRGB1(out_2,py_2,0,0+8);
+    PUTRGB1(out_1, py_1, 0, 0);
+    PUTRGB1(out_2, py_2, 0, 0 + 8);
 
-    PUTRGB1(out_2,py_2,1,2+8);
-    PUTRGB1(out_1,py_1,1,2);
+    PUTRGB1(out_2, py_2, 1, 2 + 8);
+    PUTRGB1(out_1, py_1, 1, 2);
 
-    PUTRGB1(out_1,py_1,2,4);
-    PUTRGB1(out_2,py_2,2,4+8);
+    PUTRGB1(out_1, py_1, 2, 4);
+    PUTRGB1(out_2, py_2, 2, 4 + 8);
 
-    PUTRGB1(out_2,py_2,3,6+8);
-    PUTRGB1(out_1,py_1,3,6);
+    PUTRGB1(out_2, py_2, 3, 6 + 8);
+    PUTRGB1(out_1, py_1, 3, 6);
 
-    dst_1[0]= out_1;
-    dst_2[0]= out_2;
+    dst_1[0] = out_1;
+    dst_2[0] = out_2;
 CLOSEYUV2RGBFUNC(1)
 
 SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
 {
     SwsFunc t = NULL;
 
-    if (HAVE_MMX) {
+    if (HAVE_MMX)
         t = ff_yuv2rgb_init_mmx(c);
-    } else if (HAVE_VIS) {
+    else if (HAVE_VIS)
         t = ff_yuv2rgb_init_vis(c);
-    } else if (HAVE_ALTIVEC) {
+    else if (HAVE_ALTIVEC)
         t = ff_yuv2rgb_init_altivec(c);
-    } else if (ARCH_BFIN) {
+    else if (ARCH_BFIN)
         t = ff_yuv2rgb_get_func_ptr_bfin(c);
-    }
 
     if (t)
         return t;
 
-    av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", sws_format_name(c->srcFormat), sws_format_name(c->dstFormat));
+    av_log(c, AV_LOG_WARNING,
+           "No accelerated colorspace conversion found from %s to %s.\n",
+           sws_format_name(c->srcFormat), sws_format_name(c->dstFormat));
 
     switch (c->dstFormat) {
     case PIX_FMT_BGR48BE:
-    case PIX_FMT_BGR48LE:    return yuv2rgb_c_bgr48;
+    case PIX_FMT_BGR48LE:
+        return yuv2rgb_c_bgr48;
     case PIX_FMT_RGB48BE:
-    case PIX_FMT_RGB48LE:    return yuv2rgb_c_48;
+    case PIX_FMT_RGB48LE:
+        return yuv2rgb_c_48;
     case PIX_FMT_ARGB:
-    case PIX_FMT_ABGR:       if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) return yuva2argb_c;
+    case PIX_FMT_ABGR:
+        if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P)
+            return yuva2argb_c;
     case PIX_FMT_RGBA:
-    case PIX_FMT_BGRA:       return (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) ? yuva2rgba_c : yuv2rgb_c_32;
-    case PIX_FMT_RGB24:      return yuv2rgb_c_24_rgb;
-    case PIX_FMT_BGR24:      return yuv2rgb_c_24_bgr;
+    case PIX_FMT_BGRA:
+        if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P)
+            return yuva2rgba_c;
+        else
+            return yuv2rgb_c_32;
+    case PIX_FMT_RGB24:
+        return yuv2rgb_c_24_rgb;
+    case PIX_FMT_BGR24:
+        return yuv2rgb_c_24_bgr;
     case PIX_FMT_RGB565:
     case PIX_FMT_BGR565:
     case PIX_FMT_RGB555:
-    case PIX_FMT_BGR555:     return yuv2rgb_c_16;
+    case PIX_FMT_BGR555:
+        return yuv2rgb_c_16;
     case PIX_FMT_RGB444:
-    case PIX_FMT_BGR444:     return yuv2rgb_c_12_ordered_dither;
+    case PIX_FMT_BGR444:
+        return yuv2rgb_c_12_ordered_dither;
     case PIX_FMT_RGB8:
-    case PIX_FMT_BGR8:       return yuv2rgb_c_8_ordered_dither;
+    case PIX_FMT_BGR8:
+        return yuv2rgb_c_8_ordered_dither;
     case PIX_FMT_RGB4:
-    case PIX_FMT_BGR4:       return yuv2rgb_c_4_ordered_dither;
+    case PIX_FMT_BGR4:
+        return yuv2rgb_c_4_ordered_dither;
     case PIX_FMT_RGB4_BYTE:
-    case PIX_FMT_BGR4_BYTE:  return yuv2rgb_c_4b_ordered_dither;
-    case PIX_FMT_MONOBLACK:  return yuv2rgb_c_1_ordered_dither;
+    case PIX_FMT_BGR4_BYTE:
+        return yuv2rgb_c_4b_ordered_dither;
+    case PIX_FMT_MONOBLACK:
+        return yuv2rgb_c_1_ordered_dither;
     default:
         assert(0);
     }
     return NULL;
 }
 
-static void fill_table(uint8_t* table[256], const int elemsize, const int inc, void *y_tab)
+static void fill_table(uint8_t *table[256], const int elemsize,
+                       const int inc, void *y_tab)
 {
     int i;
-    int64_t cb = 0;
+    int64_t cb       = 0;
     uint8_t *y_table = y_tab;
 
     y_table -= elemsize * (inc >> 9);
 
     for (i = 0; i < 256; i++) {
         table[i] = y_table + elemsize * (cb >> 16);
-        cb += inc;
+        cb      += inc;
     }
 }
 
@@ -569,44 +614,49 @@ static void fill_gv_table(int table[256], const int elemsize, const int inc)
 {
     int i;
     int64_t cb = 0;
-    int off = -(inc >> 9);
+    int off    = -(inc >> 9);
 
     for (i = 0; i < 256; i++) {
         table[i] = elemsize * (off + (cb >> 16));
-        cb += inc;
+        cb      += inc;
     }
 }
 
 static uint16_t roundToInt16(int64_t f)
 {
-    int r= (f + (1<<15))>>16;
-         if (r<-0x7FFF) return 0x8000;
-    else if (r> 0x7FFF) return 0x7FFF;
-    else                return r;
+    int r = (f + (1 << 15)) >> 16;
+
+    if (r < -0x7FFF)
+        return 0x8000;
+    else if (r > 0x7FFF)
+        return 0x7FFF;
+    else
+        return r;
 }
 
-av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
-                                     int brightness, int contrast, int saturation)
+av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
+                                     int fullRange, int brightness,
+                                     int contrast, int saturation)
 {
-    const int isRgb =      c->dstFormat==PIX_FMT_RGB32
-                        || c->dstFormat==PIX_FMT_RGB32_1
-                        || c->dstFormat==PIX_FMT_BGR24
-                        || c->dstFormat==PIX_FMT_RGB565BE
-                        || c->dstFormat==PIX_FMT_RGB565LE
-                        || c->dstFormat==PIX_FMT_RGB555BE
-                        || c->dstFormat==PIX_FMT_RGB555LE
-                        || c->dstFormat==PIX_FMT_RGB444BE
-                        || c->dstFormat==PIX_FMT_RGB444LE
-                        || c->dstFormat==PIX_FMT_RGB8
-                        || c->dstFormat==PIX_FMT_RGB4
-                        || c->dstFormat==PIX_FMT_RGB4_BYTE
-                        || c->dstFormat==PIX_FMT_MONOBLACK;
-    const int isNotNe =    c->dstFormat==PIX_FMT_NE(RGB565LE,RGB565BE)
-                        || c->dstFormat==PIX_FMT_NE(RGB555LE,RGB555BE)
-                        || c->dstFormat==PIX_FMT_NE(RGB444LE,RGB444BE)
-                        || c->dstFormat==PIX_FMT_NE(BGR565LE,BGR565BE)
-                        || c->dstFormat==PIX_FMT_NE(BGR555LE,BGR555BE)
-                        || c->dstFormat==PIX_FMT_NE(BGR444LE,BGR444BE);
+    const int isRgb = c->dstFormat == PIX_FMT_RGB32     ||
+                      c->dstFormat == PIX_FMT_RGB32_1   ||
+                      c->dstFormat == PIX_FMT_BGR24     ||
+                      c->dstFormat == PIX_FMT_RGB565BE  ||
+                      c->dstFormat == PIX_FMT_RGB565LE  ||
+                      c->dstFormat == PIX_FMT_RGB555BE  ||
+                      c->dstFormat == PIX_FMT_RGB555LE  ||
+                      c->dstFormat == PIX_FMT_RGB444BE  ||
+                      c->dstFormat == PIX_FMT_RGB444LE  ||
+                      c->dstFormat == PIX_FMT_RGB8      ||
+                      c->dstFormat == PIX_FMT_RGB4      ||
+                      c->dstFormat == PIX_FMT_RGB4_BYTE ||
+                      c->dstFormat == PIX_FMT_MONOBLACK;
+    const int isNotNe = c->dstFormat == PIX_FMT_NE(RGB565LE, RGB565BE) ||
+                        c->dstFormat == PIX_FMT_NE(RGB555LE, RGB555BE) ||
+                        c->dstFormat == PIX_FMT_NE(RGB444LE, RGB444BE) ||
+                        c->dstFormat == PIX_FMT_NE(BGR565LE, BGR565BE) ||
+                        c->dstFormat == PIX_FMT_NE(BGR555LE, BGR555BE) ||
+                        c->dstFormat == PIX_FMT_NE(BGR444LE, BGR444BE);
     const int bpp = c->dstFormatBpp;
     uint8_t *y_table;
     uint16_t *y_table16;
@@ -618,43 +668,42 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
     int64_t cbu =  inv_table[1];
     int64_t cgu = -inv_table[2];
     int64_t cgv = -inv_table[3];
-    int64_t cy  = 1<<16;
+    int64_t cy  = 1 << 16;
     int64_t oy  = 0;
-
-    int64_t yb = 0;
+    int64_t yb  = 0;
 
     if (!fullRange) {
-        cy = (cy*255) / 219;
-        oy = 16<<16;
+        cy = (cy * 255) / 219;
+        oy = 16 << 16;
     } else {
-        crv = (crv*224) / 255;
-        cbu = (cbu*224) / 255;
-        cgu = (cgu*224) / 255;
-        cgv = (cgv*224) / 255;
+        crv = (crv * 224) / 255;
+        cbu = (cbu * 224) / 255;
+        cgu = (cgu * 224) / 255;
+        cgv = (cgv * 224) / 255;
     }
 
-    cy  = (cy *contrast             ) >> 16;
-    crv = (crv*contrast * saturation) >> 32;
-    cbu = (cbu*contrast * saturation) >> 32;
-    cgu = (cgu*contrast * saturation) >> 32;
-    cgv = (cgv*contrast * saturation) >> 32;
-    oy -= 256*brightness;
+    cy   = (cy  * contrast)              >> 16;
+    crv  = (crv * contrast * saturation) >> 32;
+    cbu  = (cbu * contrast * saturation) >> 32;
+    cgu  = (cgu * contrast * saturation) >> 32;
+    cgv  = (cgv * contrast * saturation) >> 32;
+    oy  -= 256 * brightness;
 
-    c->uOffset=   0x0400040004000400LL;
-    c->vOffset=   0x0400040004000400LL;
-    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
-    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
-    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
-    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
-    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
-    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
+    c->uOffset = 0x0400040004000400LL;
+    c->vOffset = 0x0400040004000400LL;
+    c->yCoeff  = roundToInt16(cy  * 8192) * 0x0001000100010001ULL;
+    c->vrCoeff = roundToInt16(crv * 8192) * 0x0001000100010001ULL;
+    c->ubCoeff = roundToInt16(cbu * 8192) * 0x0001000100010001ULL;
+    c->vgCoeff = roundToInt16(cgv * 8192) * 0x0001000100010001ULL;
+    c->ugCoeff = roundToInt16(cgu * 8192) * 0x0001000100010001ULL;
+    c->yOffset = roundToInt16(oy  *    8) * 0x0001000100010001ULL;
 
-    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
-    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
-    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
-    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
-    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
-    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
+    c->yuv2rgb_y_coeff   = (int16_t)roundToInt16(cy  << 13);
+    c->yuv2rgb_y_offset  = (int16_t)roundToInt16(oy  <<  9);
+    c->yuv2rgb_v2r_coeff = (int16_t)roundToInt16(crv << 13);
+    c->yuv2rgb_v2g_coeff = (int16_t)roundToInt16(cgv << 13);
+    c->yuv2rgb_u2g_coeff = (int16_t)roundToInt16(cgu << 13);
+    c->yuv2rgb_u2b_coeff = (int16_t)roundToInt16(cbu << 13);
 
     //scale coefficients by cy
     crv = ((crv << 16) + 0x8000) / cy;
@@ -667,28 +716,28 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
     switch (bpp) {
     case 1:
         c->yuvTable = av_malloc(1024);
-        y_table = c->yuvTable;
-        yb = -(384<<16) - oy;
-        for (i = 0; i < 1024-110; i++) {
-            y_table[i+110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
-            yb += cy;
+        y_table     = c->yuvTable;
+        yb = -(384 << 16) - oy;
+        for (i = 0; i < 1024 - 110; i++) {
+            y_table[i + 110]  = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
+            yb               += cy;
         }
         fill_table(c->table_gU, 1, cgu, y_table + yoffs);
         fill_gv_table(c->table_gV, 1, cgv);
         break;
     case 4:
-    case 4|128:
-        rbase = isRgb ? 3 : 0;
-        gbase = 1;
-        bbase = isRgb ? 0 : 3;
-        c->yuvTable = av_malloc(1024*3);
-        y_table = c->yuvTable;
-        yb = -(384<<16) - oy;
-        for (i = 0; i < 1024-110; i++) {
-            int yval = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table[i+110     ] =  (yval >> 7)       << rbase;
-            y_table[i+ 37+1024] = ((yval + 43) / 85) << gbase;
-            y_table[i+110+2048] =  (yval >> 7)       << bbase;
+    case 4 | 128:
+        rbase       = isRgb ? 3 : 0;
+        gbase       = 1;
+        bbase       = isRgb ? 0 : 3;
+        c->yuvTable = av_malloc(1024 * 3);
+        y_table     = c->yuvTable;
+        yb = -(384 << 16) - oy;
+        for (i = 0; i < 1024 - 110; i++) {
+            int yval                = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table[i + 110]        = (yval >> 7)        << rbase;
+            y_table[i +  37 + 1024] = ((yval + 43) / 85) << gbase;
+            y_table[i + 110 + 2048] = (yval >> 7)        << bbase;
             yb += cy;
         }
         fill_table(c->table_rV, 1, crv, y_table + yoffs);
@@ -697,17 +746,17 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
         fill_gv_table(c->table_gV, 1, cgv);
         break;
     case 8:
-        rbase = isRgb ? 5 : 0;
-        gbase = isRgb ? 2 : 3;
-        bbase = isRgb ? 0 : 6;
-        c->yuvTable = av_malloc(1024*3);
-        y_table = c->yuvTable;
-        yb = -(384<<16) - oy;
-        for (i = 0; i < 1024-38; i++) {
-            int yval = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table[i+16     ] = ((yval + 18) / 36) << rbase;
-            y_table[i+16+1024] = ((yval + 18) / 36) << gbase;
-            y_table[i+37+2048] = ((yval + 43) / 85) << bbase;
+        rbase       = isRgb ? 5 : 0;
+        gbase       = isRgb ? 2 : 3;
+        bbase       = isRgb ? 0 : 6;
+        c->yuvTable = av_malloc(1024 * 3);
+        y_table     = c->yuvTable;
+        yb = -(384 << 16) - oy;
+        for (i = 0; i < 1024 - 38; i++) {
+            int yval               = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table[i + 16]        = ((yval + 18) / 36) << rbase;
+            y_table[i + 16 + 1024] = ((yval + 18) / 36) << gbase;
+            y_table[i + 37 + 2048] = ((yval + 43) / 85) << bbase;
             yb += cy;
         }
         fill_table(c->table_rV, 1, crv, y_table + yoffs);
@@ -716,21 +765,21 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
         fill_gv_table(c->table_gV, 1, cgv);
         break;
     case 12:
-        rbase = isRgb ? 8 : 0;
-        gbase = 4;
-        bbase = isRgb ? 0 : 8;
-        c->yuvTable = av_malloc(1024*3*2);
-        y_table16 = c->yuvTable;
-        yb = -(384<<16) - oy;
+        rbase       = isRgb ? 8 : 0;
+        gbase       = 4;
+        bbase       = isRgb ? 0 : 8;
+        c->yuvTable = av_malloc(1024 * 3 * 2);
+        y_table16   = c->yuvTable;
+        yb = -(384 << 16) - oy;
         for (i = 0; i < 1024; i++) {
-            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table16[i     ] = (yval >> 4) << rbase;
-            y_table16[i+1024] = (yval >> 4) << gbase;
-            y_table16[i+2048] = (yval >> 4) << bbase;
+            uint8_t yval        = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table16[i]        = (yval >> 4) << rbase;
+            y_table16[i + 1024] = (yval >> 4) << gbase;
+            y_table16[i + 2048] = (yval >> 4) << bbase;
             yb += cy;
         }
         if (isNotNe)
-            for (i = 0; i < 1024*3; i++)
+            for (i = 0; i < 1024 * 3; i++)
                 y_table16[i] = av_bswap16(y_table16[i]);
         fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
         fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
@@ -739,21 +788,21 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
         break;
     case 15:
     case 16:
-        rbase = isRgb ? bpp - 5 : 0;
-        gbase = 5;
-        bbase = isRgb ? 0 : (bpp - 5);
-        c->yuvTable = av_malloc(1024*3*2);
-        y_table16 = c->yuvTable;
-        yb = -(384<<16) - oy;
+        rbase       = isRgb ? bpp - 5 : 0;
+        gbase       = 5;
+        bbase       = isRgb ? 0 : (bpp - 5);
+        c->yuvTable = av_malloc(1024 * 3 * 2);
+        y_table16   = c->yuvTable;
+        yb = -(384 << 16) - oy;
         for (i = 0; i < 1024; i++) {
-            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table16[i     ] = (yval >> 3)          << rbase;
-            y_table16[i+1024] = (yval >> (18 - bpp)) << gbase;
-            y_table16[i+2048] = (yval >> 3)          << bbase;
+            uint8_t yval        = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table16[i]        = (yval >> 3)          << rbase;
+            y_table16[i + 1024] = (yval >> (18 - bpp)) << gbase;
+            y_table16[i + 2048] = (yval >> 3)          << bbase;
             yb += cy;
         }
-        if(isNotNe)
-            for (i = 0; i < 1024*3; i++)
+        if (isNotNe)
+            for (i = 0; i < 1024 * 3; i++)
                 y_table16[i] = av_bswap16(y_table16[i]);
         fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
         fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
@@ -763,11 +812,11 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
     case 24:
     case 48:
         c->yuvTable = av_malloc(1024);
-        y_table = c->yuvTable;
-        yb = -(384<<16) - oy;
+        y_table     = c->yuvTable;
+        yb = -(384 << 16) - oy;
         for (i = 0; i < 1024; i++) {
-            y_table[i] = av_clip_uint8((yb + 0x8000) >> 16);
-            yb += cy;
+            y_table[i]  = av_clip_uint8((yb + 0x8000) >> 16);
+            yb         += cy;
         }
         fill_table(c->table_rV, 1, crv, y_table + yoffs);
         fill_table(c->table_gU, 1, cgu, y_table + yoffs);
@@ -775,21 +824,23 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
         fill_gv_table(c->table_gV, 1, cgv);
         break;
     case 32:
-        base = (c->dstFormat == PIX_FMT_RGB32_1 || c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0;
-        rbase = base + (isRgb ? 16 : 0);
-        gbase = base + 8;
-        bbase = base + (isRgb ? 0 : 16);
+        base      = (c->dstFormat == PIX_FMT_RGB32_1 ||
+                     c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0;
+        rbase     = base + (isRgb ? 16 : 0);
+        gbase     = base + 8;
+        bbase     = base + (isRgb ? 0 : 16);
         needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat);
         if (!needAlpha)
             abase = (base + 24) & 31;
-        c->yuvTable = av_malloc(1024*3*4);
-        y_table32 = c->yuvTable;
-        yb = -(384<<16) - oy;
+        c->yuvTable = av_malloc(1024 * 3 * 4);
+        y_table32   = c->yuvTable;
+        yb = -(384 << 16) - oy;
         for (i = 0; i < 1024; i++) {
-            unsigned yval = av_clip_uint8((yb + 0x8000) >> 16);
-            y_table32[i     ] = (yval << rbase) + (needAlpha ? 0 : (255u << abase));
-            y_table32[i+1024] = yval << gbase;
-            y_table32[i+2048] = yval << bbase;
+            unsigned yval       = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table32[i]        = (yval << rbase) +
+                                  (needAlpha ? 0 : (255u << abase));
+            y_table32[i + 1024] =  yval << gbase;
+            y_table32[i + 2048] =  yval << bbase;
             yb += cy;
         }
         fill_table(c->table_rV, 4, crv, y_table32 + yoffs);