From 39da3b223fe52ec5b51e4af7d123b47b890efd8f Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Tue, 6 Mar 2012 08:57:58 +0100
Subject: [PATCH 01/11] avconv: fix counting encoded video size.

avcodec_encode_video2() return value is 0 on success, encoded frame size
is stored in the packet.
---
 avconv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/avconv.c b/avconv.c
index 8526961f7a..f9ad054276 100644
--- a/avconv.c
+++ b/avconv.c
@@ -1475,8 +1475,8 @@ static void do_video_out(AVFormatContext *s,
                     pkt.dts = av_rescale_q(pkt.dts, enc->time_base, ost->st->time_base);
 
                 write_frame(s, &pkt, ost);
-                *frame_size = ret;
-                video_size += ret;
+                *frame_size = pkt.size;
+                video_size += pkt.size;
 
                 /* if two pass, output log */
                 if (ost->logfile && enc->stats_out) {

From 338978a7c17d303672bcf5e035e54da362274a18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Mon, 5 Mar 2012 21:54:17 +0200
Subject: [PATCH 02/11] libx264: Allow overriding the sliced threads option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavcodec/libx264.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 3934627537..abce6a858d 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -379,6 +379,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR;
 
     x4->params.i_threads      = avctx->thread_count;
+    if (avctx->thread_type)
+        x4->params.b_sliced_threads = avctx->thread_type == FF_THREAD_SLICE;
 
     x4->params.b_interlaced   = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
 
@@ -536,6 +538,7 @@ static const AVCodecDefault x264_defaults[] = {
     { "coder",            "-1" },
     { "cmp",              "-1" },
     { "threads",          AV_STRINGIFY(X264_THREADS_AUTO) },
+    { "thread_type",      "0" },
     { NULL },
 };
 

From 4d851f8dcf951d380e935ef14ae01db813adfc2d Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 4 Mar 2012 16:08:48 +0100
Subject: [PATCH 03/11] cpu: add av_set_cpu_flags_mask().

---
 doc/APIchanges     |  3 +++
 libavutil/avutil.h |  2 +-
 libavutil/cpu.c    | 12 +++++++++++-
 libavutil/cpu.h    |  8 ++++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index c3755299dc..8ddd4d3742 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -12,6 +12,9 @@ libavutil:   2011-04-18
 
 API changes, most recent first:
 
+2012-03-xx - xxxxxxx - lavu 51.25.0 - cpu.h
+  Add av_set_cpu_flags_mask().
+
 2012-xx-xx - lavc 54.8.0
   xxxxxxx Add av_get_exact_bits_per_sample()
   xxxxxxx Add av_get_audio_frame_duration()
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index e4219eb87f..21fc737114 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -152,7 +152,7 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR 24
+#define LIBAVUTIL_VERSION_MINOR 25
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 25895d6d5d..e72f7231a8 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -19,9 +19,11 @@
 #include "cpu.h"
 #include "config.h"
 
+static int cpuflags_mask, checked;
+
 int av_get_cpu_flags(void)
 {
-    static int flags, checked;
+    static int flags;
 
     if (checked)
         return flags;
@@ -30,10 +32,18 @@ int av_get_cpu_flags(void)
     if (ARCH_PPC) flags = ff_get_cpu_flags_ppc();
     if (ARCH_X86) flags = ff_get_cpu_flags_x86();
 
+    flags  &= cpuflags_mask;
     checked = 1;
+
     return flags;
 }
 
+void av_set_cpu_flags_mask(int mask)
+{
+    cpuflags_mask = mask;
+    checked       = 0;
+}
+
 #ifdef TEST
 
 #undef printf
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index df7bf4421a..361fe9866a 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -48,6 +48,14 @@
  */
 int av_get_cpu_flags(void);
 
+/**
+ * Set a mask on flags returned by av_get_cpu_flags().
+ * This function is mainly useful for testing.
+ *
+ * @warning this function is not thread safe.
+ */
+void av_set_cpu_flags_mask(int mask);
+
 /* The following CPU-specific functions shall not be called directly. */
 int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);

From 4138cd29077de2fbca9c49e96f70d21a78e24e33 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Sun, 4 Mar 2012 16:46:45 +0100
Subject: [PATCH 04/11] avconv: add -cpuflags option for setting supported
 cpuflags.

Useful for testing.
---
 avconv.c        | 64 +++++++++++++++++++++++++++++++++++++++++++++++++
 cmdutils.c      |  7 ++----
 cmdutils.h      |  6 +++++
 doc/avconv.texi |  4 ++++
 4 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/avconv.c b/avconv.c
index f9ad054276..53a7661640 100644
--- a/avconv.c
+++ b/avconv.c
@@ -4397,6 +4397,67 @@ static int opt_deinterlace(const char *opt, const char *arg)
     return 0;
 }
 
+static int opt_cpuflags(const char *opt, const char *arg)
+{
+#define CPUFLAG_MMX2     (AV_CPU_FLAG_MMX      | AV_CPU_FLAG_MMX2)
+#define CPUFLAG_3DNOW    (AV_CPU_FLAG_3DNOW    | AV_CPU_FLAG_MMX)
+#define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW)
+#define CPUFLAG_SSE      (AV_CPU_FLAG_SSE      | CPUFLAG_MMX2)
+#define CPUFLAG_SSE2     (AV_CPU_FLAG_SSE2     | CPUFLAG_SSE)
+#define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2)
+#define CPUFLAG_SSE3     (AV_CPU_FLAG_SSE3     | CPUFLAG_SSE2)
+#define CPUFLAG_SSE3SLOW (AV_CPU_FLAG_SSE3SLOW | CPUFLAG_SSE3)
+#define CPUFLAG_SSSE3    (AV_CPU_FLAG_SSSE3    | CPUFLAG_SSE3)
+#define CPUFLAG_SSE4     (AV_CPU_FLAG_SSE4     | CPUFLAG_SSSE3)
+#define CPUFLAG_SSE42    (AV_CPU_FLAG_SSE42    | CPUFLAG_SSE4)
+#define CPUFLAG_AVX      (AV_CPU_FLAG_AVX      | CPUFLAG_SSE42)
+#define CPUFLAG_XOP      (AV_CPU_FLAG_XOP      | CPUFLAG_AVX)
+#define CPUFLAG_FMA4     (AV_CPU_FLAG_FMA4     | CPUFLAG_AVX)
+    static const AVOption cpuflags_opts[] = {
+        { "flags"   , NULL, 0, AV_OPT_TYPE_FLAGS, { 0 }, INT64_MIN, INT64_MAX, .unit = "flags" },
+        { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC  },    .unit = "flags" },
+        { "mmx"     , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX      },    .unit = "flags" },
+        { "mmx2"    , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2         },    .unit = "flags" },
+        { "sse"     , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE          },    .unit = "flags" },
+        { "sse2"    , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2         },    .unit = "flags" },
+        { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW     },    .unit = "flags" },
+        { "sse3"    , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3         },    .unit = "flags" },
+        { "sse3slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3SLOW     },    .unit = "flags" },
+        { "ssse3"   , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSSE3        },    .unit = "flags" },
+        { "atom"    , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ATOM     },    .unit = "flags" },
+        { "sse4.1"  , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE4         },    .unit = "flags" },
+        { "sse4.2"  , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE42        },    .unit = "flags" },
+        { "avx"     , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_AVX          },    .unit = "flags" },
+        { "xop"     , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_XOP          },    .unit = "flags" },
+        { "fma4"    , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_FMA4         },    .unit = "flags" },
+        { "3dnow"   , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOW        },    .unit = "flags" },
+        { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOWEXT     },    .unit = "flags" },
+        { NULL },
+    };
+    static const AVClass class = {
+        .class_name = "cpuflags",
+        .item_name  = av_default_item_name,
+        .option     = cpuflags_opts,
+        .version    = LIBAVUTIL_VERSION_INT,
+    };
+
+    int flags = 0, ret;
+    const AVClass *pclass = &class;
+
+    if ((ret = av_opt_eval_flags(&pclass, &cpuflags_opts[0], arg, &flags)) < 0)
+        return ret;
+
+    av_set_cpu_flags_mask(flags);
+    return 0;
+}
+
+static void parse_cpuflags(int argc, char **argv, const OptionDef *options)
+{
+    int idx = locate_option(argc, argv, options, "cpuflags");
+    if (idx && argv[idx + 1])
+        opt_cpuflags("cpuflags", argv[idx + 1]);
+}
+
 #define OFFSET(x) offsetof(OptionsContext, x)
 static const OptionDef options[] = {
     /* main options */
@@ -4446,6 +4507,7 @@ static const OptionDef options[] = {
     { "stats", OPT_BOOL, {&print_stats}, "print progress report during encoding", },
     { "attach", HAS_ARG | OPT_FUNC2, {(void*)opt_attach}, "add an attachment to the output file", "filename" },
     { "dump_attachment", HAS_ARG | OPT_STRING | OPT_SPEC, {.off = OFFSET(dump_attachment)}, "extract an attachment into a file", "filename" },
+    { "cpuflags", HAS_ARG | OPT_EXPERT, {(void*)opt_cpuflags}, "set CPU flags mask", "mask" },
 
     /* video options */
     { "vframes", HAS_ARG | OPT_VIDEO | OPT_FUNC2, {(void*)opt_video_frames}, "set the number of video frames to record", "number" },
@@ -4532,6 +4594,8 @@ int main(int argc, char **argv)
 
     show_banner();
 
+    parse_cpuflags(argc, argv, options);
+
     /* parse options */
     parse_options(&o, argc, argv, options, opt_output_file);
 
diff --git a/cmdutils.c b/cmdutils.c
index ed0813f986..336f50bfd8 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -320,11 +320,8 @@ void parse_options(void *optctx, int argc, char **argv, const OptionDef *options
     }
 }
 
-/*
- * Return index of option opt in argv or 0 if not found.
- */
-static int locate_option(int argc, char **argv, const OptionDef *options,
-                         const char *optname)
+int locate_option(int argc, char **argv, const OptionDef *options,
+                  const char *optname)
 {
     const OptionDef *po;
     int i;
diff --git a/cmdutils.h b/cmdutils.h
index 37fd4d3b67..67126493a4 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -189,6 +189,12 @@ int parse_option(void *optctx, const char *opt, const char *arg,
  */
 void parse_loglevel(int argc, char **argv, const OptionDef *options);
 
+/**
+ * Return index of option opt in argv or 0 if not found.
+ */
+int locate_option(int argc, char **argv, const OptionDef *options,
+                  const char *optname);
+
 /**
  * Check if the given stream matches a stream specifier.
  *
diff --git a/doc/avconv.texi b/doc/avconv.texi
index 25fb12fdc0..91283a4831 100644
--- a/doc/avconv.texi
+++ b/doc/avconv.texi
@@ -808,6 +808,10 @@ avconv -i file.mov -an -vn -bsf:s mov2textsub -c:s copy -f rawvideo sub.txt
 
 @item -tag[:@var{stream_specifier}] @var{codec_tag} (@emph{output,per-stream})
 Force a tag/fourcc for matching streams.
+
+@item -cpuflags mask (@emph{global})
+Set a mask that's applied to autodetected CPU flags.  This option is intended
+for testing. Do not use it unless you know what you're doing.
 @end table
 @c man end OPTIONS
 

From 018f39ef496c768f7cd580a96ae971e03c78205e Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Mon, 5 Mar 2012 08:05:56 +0100
Subject: [PATCH 05/11] FATE: add CPUFLAGS variable, mapping to -cpuflags
 avconv option.

---
 doc/fate.texi             | 4 +++-
 tests/Makefile            | 2 +-
 tests/fate-run.sh         | 5 +++--
 tests/regression-funcs.sh | 3 ++-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/doc/fate.texi b/doc/fate.texi
index b4f520fef9..c1011e73f8 100644
--- a/doc/fate.texi
+++ b/doc/fate.texi
@@ -75,10 +75,12 @@ meaning only while running the regression tests.
 @item THREADS
 Specify how many threads to use while running regression tests, it is
 quite useful to detect thread-related regressions.
+@item CPUFLAGS
+Specify a mask to be applied to autodetected CPU flags.
 @end table
 
 @example
-    make V=1 SAMPLES=/var/fate/samples THREADS=2 fate
+    make V=1 SAMPLES=/var/fate/samples THREADS=2 CPUFLAGS=mmx fate
 @end example
 
 @chapter Automated Tests
diff --git a/tests/Makefile b/tests/Makefile
index 6fed995e0a..5c65237b44 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -116,7 +116,7 @@ fate: $(FATE)
 
 $(FATE): avconv$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
 	@echo "TEST    $(@:fate-%=%)"
-	$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)'
+	$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)'
 
 fate-list:
 	@printf '%s\n' $(sort $(FATE))
diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index 07cfe74ba7..a1503aecbd 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh
@@ -17,6 +17,7 @@ ref=${7:-"${base}/ref/fate/${test}"}
 fuzz=$8
 threads=${9:-1}
 thread_type=${10:-frame+slice}
+cpuflags=${11:-all}
 
 outdir="tests/data/fate"
 outfile="${outdir}/${test}"
@@ -50,7 +51,7 @@ run(){
 }
 
 avconv(){
-    run avconv -nostats -threads $threads -thread_type $thread_type "$@"
+    run avconv -nostats -threads $threads -thread_type $thread_type -cpuflags $cpuflags "$@"
 }
 
 framecrc(){
@@ -76,7 +77,7 @@ pcm(){
 regtest(){
     t="${test#$2-}"
     ref=${base}/ref/$2/$t
-    ${base}/${1}-regression.sh $t $2 $3 "$target_exec" "$target_path" "$threads" "$thread_type"
+    ${base}/${1}-regression.sh $t $2 $3 "$target_exec" "$target_path" "$threads" "$thread_type" "$cpuflags"
 }
 
 codectest(){
diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh
index 3306f39941..3889129009 100755
--- a/tests/regression-funcs.sh
+++ b/tests/regression-funcs.sh
@@ -10,6 +10,7 @@ raw_src_dir=$3
 target_exec=$4
 target_path=$5
 threads=${6:-1}
+cpuflags=${8:-all}
 
 datadir="./tests/data"
 target_datadir="${target_path}/${datadir}"
@@ -43,7 +44,7 @@ echov(){
 
 . $(dirname $0)/md5.sh
 
-AVCONV_OPTS="-nostats -y"
+AVCONV_OPTS="-nostats -y -cpuflags $cpuflags"
 COMMON_OPTS="-flags +bitexact -idct simple -sws_flags +accurate_rnd+bitexact"
 DEC_OPTS="$COMMON_OPTS -threads $threads"
 ENC_OPTS="$COMMON_OPTS -threads 1 -dct fastint"

From 2254b559cbcfc0418135f09add37c0a5866b1981 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Mon, 5 Mar 2012 12:26:42 -0800
Subject: [PATCH 06/11] swscale: make filterPos 32bit.

Fixes overflows for large image sizes.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
CC: libav-stable@libav.org
---
 libswscale/ppc/swscale_altivec.c  |  2 +-
 libswscale/swscale.c              | 20 ++++++++++----------
 libswscale/swscale_internal.h     | 12 ++++++------
 libswscale/utils.c                |  4 ++--
 libswscale/x86/scale.asm          | 31 +++++++++++++++++--------------
 libswscale/x86/swscale_mmx.c      |  6 +++---
 libswscale/x86/swscale_template.c |  4 ++--
 7 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 3041053096..5537707bd0 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -144,7 +144,7 @@ static void yuv2planeX_altivec(const int16_t *filter, int filterSize,
 
 static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
                                 const uint8_t *src, const int16_t *filter,
-                                const int16_t *filterPos, int filterSize)
+                                const int32_t *filterPos, int filterSize)
 {
     register int i;
     DECLARE_ALIGNED(16, int, tempo)[4];
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index bd909bf163..a98a4bb90f 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -61,7 +61,7 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride,
 
 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
                            const int16_t *filter,
-                           const int16_t *filterPos, int filterSize)
+                           const int32_t *filterPos, int filterSize)
 {
     int i;
     int32_t *dst = (int32_t *) _dst;
@@ -84,7 +84,7 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t
 
 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
                            const int16_t *filter,
-                           const int16_t *filterPos, int filterSize)
+                           const int32_t *filterPos, int filterSize)
 {
     int i;
     const uint16_t *src = (const uint16_t *) _src;
@@ -105,7 +105,7 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t
 
 // bilinear / bicubic scaling
 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
-                          const int16_t *filter, const int16_t *filterPos,
+                          const int16_t *filter, const int32_t *filterPos,
                           int filterSize)
 {
     int i;
@@ -123,7 +123,7 @@ static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *
 }
 
 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
-                          const int16_t *filter, const int16_t *filterPos,
+                          const int16_t *filter, const int32_t *filterPos,
                           int filterSize)
 {
     int i;
@@ -224,7 +224,7 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
                                      const uint8_t *src_in[4], int srcW, int xInc,
                                      const int16_t *hLumFilter,
-                                     const int16_t *hLumFilterPos, int hLumFilterSize,
+                                     const int32_t *hLumFilterPos, int hLumFilterSize,
                                      uint8_t *formatConvBuffer,
                                      uint32_t *pal, int isAlpha)
 {
@@ -268,7 +268,7 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
                                      const uint8_t *src_in[4],
                                      int srcW, int xInc, const int16_t *hChrFilter,
-                                     const int16_t *hChrFilterPos, int hChrFilterSize,
+                                     const int32_t *hChrFilterPos, int hChrFilterSize,
                                      uint8_t *formatConvBuffer, uint32_t *pal)
 {
     const uint8_t *src1 = src_in[1], *src2 = src_in[2];
@@ -312,10 +312,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
     const int chrXInc= c->chrXInc;
     const enum PixelFormat dstFormat= c->dstFormat;
     const int flags= c->flags;
-    int16_t *vLumFilterPos= c->vLumFilterPos;
-    int16_t *vChrFilterPos= c->vChrFilterPos;
-    int16_t *hLumFilterPos= c->hLumFilterPos;
-    int16_t *hChrFilterPos= c->hChrFilterPos;
+    int32_t *vLumFilterPos= c->vLumFilterPos;
+    int32_t *vChrFilterPos= c->vChrFilterPos;
+    int32_t *hLumFilterPos= c->hLumFilterPos;
+    int32_t *hChrFilterPos= c->hChrFilterPos;
     int16_t *vLumFilter= c->vLumFilter;
     int16_t *vChrFilter= c->vChrFilter;
     int16_t *hLumFilter= c->hLumFilter;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index bc36826ea2..29fe4ff2c6 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -295,10 +295,10 @@ typedef struct SwsContext {
     int16_t *hChrFilter;          ///< Array of horizontal filter coefficients for chroma     planes.
     int16_t *vLumFilter;          ///< Array of vertical   filter coefficients for luma/alpha planes.
     int16_t *vChrFilter;          ///< Array of vertical   filter coefficients for chroma     planes.
-    int16_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
-    int16_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
-    int16_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
-    int16_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
+    int32_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
+    int32_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
+    int32_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
+    int32_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
     int hLumFilterSize;           ///< Horizontal filter size for luma/alpha pixels.
     int hChrFilterSize;           ///< Horizontal filter size for chroma     pixels.
     int vLumFilterSize;           ///< Vertical   filter size for luma/alpha pixels.
@@ -508,10 +508,10 @@ typedef struct SwsContext {
     /** @{ */
     void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
                     const uint8_t *src, const int16_t *filter,
-                    const int16_t *filterPos, int filterSize);
+                    const int32_t *filterPos, int filterSize);
     void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
                     const uint8_t *src, const int16_t *filter,
-                    const int16_t *filterPos, int filterSize);
+                    const int32_t *filterPos, int filterSize);
     /** @} */
 
     /// Color range conversion function for luma plane if needed.
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 7c89a70953..51dd331117 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -180,7 +180,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist
                                          dist-1.0);
 }
 
-static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
+static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc,
                       int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags,
                       SwsVector *srcFilter, SwsVector *dstFilter, double param[2], int is_horizontal)
 {
@@ -196,7 +196,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
     emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions)
 
     // NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end
-    FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(int16_t), fail);
+    FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(**filterPos), fail);
 
     if (FFABS(xInc - 0x10000) <10) { // unscaled
         int i;
diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index 0d367f7a14..2387d0266b 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -38,7 +38,7 @@ SECTION .text
 ;                               (SwsContext *c, int{16,32}_t *dst,
 ;                                int dstW, const uint{8,16}_t *src,
 ;                                const int16_t *filter,
-;                                const int16_t *filterPos, int filterSize);
+;                                const int32_t *filterPos, int filterSize);
 ;
 ; Scale one horizontal line. Input is either 8-bits width or 16-bits width
 ; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to
@@ -53,6 +53,9 @@ SECTION .text
 cglobal hscale%1to%2_%4_%5, %6, 7, %7
 %if ARCH_X86_64
     movsxd        r2, r2d
+%define mov32 movsxd
+%else ; x86-32
+%define mov32 mov
 %endif ; x86-64
 %if %2 == 19
 %if mmsize == 8 ; mmx
@@ -95,14 +98,14 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
 %else ; %2 == 19
     lea           r1, [r1+r2*(4>>r2shr)]
 %endif ; %2 == 15/19
-    lea           r5, [r5+r2*(2>>r2shr)]
+    lea           r5, [r5+r2*(4>>r2shr)]
     neg           r2
 
 .loop:
 %if %3 == 4 ; filterSize == 4 scaling
     ; load 2x4 or 4x4 source pixels into m0/m1
-    movsx         r0, word [r5+r2*2+0]   ; filterPos[0]
-    movsx         r6, word [r5+r2*2+2]   ; filterPos[1]
+    mov32         r0, dword [r5+r2*4+0]  ; filterPos[0]
+    mov32         r6, dword [r5+r2*4+4]  ; filterPos[1]
     movlh         m0, [r3+r0*srcmul]     ; src[filterPos[0] + {0,1,2,3}]
 %if mmsize == 8
     movlh         m1, [r3+r6*srcmul]     ; src[filterPos[1] + {0,1,2,3}]
@@ -112,8 +115,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
 %else ; %1 == 8
     movd          m4, [r3+r6*srcmul]     ; src[filterPos[1] + {0,1,2,3}]
 %endif
-    movsx         r0, word [r5+r2*2+4]   ; filterPos[2]
-    movsx         r6, word [r5+r2*2+6]   ; filterPos[3]
+    mov32         r0, dword [r5+r2*4+8]  ; filterPos[2]
+    mov32         r6, dword [r5+r2*4+12] ; filterPos[3]
     movlh         m1, [r3+r0*srcmul]     ; src[filterPos[2] + {0,1,2,3}]
 %if %1 > 8
     movhps        m1, [r3+r6*srcmul]     ; src[filterPos[3] + {0,1,2,3}]
@@ -156,8 +159,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
 %endif ; mmx/sse2/ssse3/sse4
 %else ; %3 == 8, i.e. filterSize == 8 scaling
     ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5
-    movsx         r0, word [r5+r2*1+0]   ; filterPos[0]
-    movsx         r6, word [r5+r2*1+2]   ; filterPos[1]
+    mov32         r0, dword [r5+r2*2+0]  ; filterPos[0]
+    mov32         r6, dword [r5+r2*2+4]  ; filterPos[1]
     movbh         m0, [r3+ r0   *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}]
 %if mmsize == 8
     movbh         m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}]
@@ -165,8 +168,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
     movbh         m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}]
 %else ; mmsize == 16
     movbh         m1, [r3+ r6   *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}]
-    movsx         r0, word [r5+r2*1+4]   ; filterPos[2]
-    movsx         r6, word [r5+r2*1+6]   ; filterPos[3]
+    mov32         r0, dword [r5+r2*2+8]  ; filterPos[2]
+    mov32         r6, dword [r5+r2*2+12] ; filterPos[3]
     movbh         m4, [r3+ r0   *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}]
     movbh         m5, [r3+ r6   *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}]
 %endif ; mmsize == 8/16
@@ -251,7 +254,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
 %define r1x     r1
 %define filter2 r6m
 %endif ; x86-32/64
-    lea           r5, [r5+r2*2]
+    lea           r5, [r5+r2*4]
 %if %2 == 15
     lea           r1, [r1+r2*2]
 %else ; %2 == 19
@@ -261,8 +264,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
     neg           r2
 
 .loop:
-    movsx         r0, word [r5+r2*2+0]   ; filterPos[0]
-    movsx        r1x, word [r5+r2*2+2]   ; filterPos[1]
+    mov32         r0, dword [r5+r2*4+0]  ; filterPos[0]
+    mov32        r1x, dword [r5+r2*4+4]  ; filterPos[1]
     ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)?
     pxor          m4, m4
     pxor          m5, m5
@@ -293,7 +296,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
     jl .innerloop
 
 %ifidn %4, X4
-    movsx        r1x, word [r5+r2*2+2]   ; filterPos[1]
+    mov32        r1x, dword [r5+r2*4+4]  ; filterPos[1]
     movlh         m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0]
     sub          r1x, r6                   ; and first 4 srcpx of dstpx[1]
 %if %1 > 8
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c
index 64d5f0fc9d..99b32623cb 100644
--- a/libswscale/x86/swscale_mmx.c
+++ b/libswscale/x86/swscale_mmx.c
@@ -93,8 +93,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
     int16_t **alpPixBuf= c->alpPixBuf;
     const int vLumBufSize= c->vLumBufSize;
     const int vChrBufSize= c->vChrBufSize;
-    int16_t *vLumFilterPos= c->vLumFilterPos;
-    int16_t *vChrFilterPos= c->vChrFilterPos;
+    int32_t *vLumFilterPos= c->vLumFilterPos;
+    int32_t *vChrFilterPos= c->vChrFilterPos;
     int16_t *vLumFilter= c->vLumFilter;
     int16_t *vChrFilter= c->vChrFilter;
     int32_t *lumMmxFilter= c->lumMmxFilter;
@@ -204,7 +204,7 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt(
                                                 SwsContext *c, int16_t *data, \
                                                 int dstW, const uint8_t *src, \
                                                 const int16_t *filter, \
-                                                const int16_t *filterPos, int filterSize)
+                                                const int32_t *filterPos, int filterSize)
 
 #define SCALE_FUNCS(filter_n, opt) \
     SCALE_FUNC(filter_n,  8, 15, opt); \
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 4db3fb3086..ad2b32f27b 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1376,7 +1376,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
                                  int dstWidth, const uint8_t *src,
                                  int srcW, int xInc)
 {
-    int16_t *filterPos = c->hLumFilterPos;
+    int32_t *filterPos = c->hLumFilterPos;
     int16_t *filter    = c->hLumFilter;
     void    *mmx2FilterCode= c->lumMmx2FilterCode;
     int i;
@@ -1472,7 +1472,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
                                  int dstWidth, const uint8_t *src1,
                                  const uint8_t *src2, int srcW, int xInc)
 {
-    int16_t *filterPos = c->hChrFilterPos;
+    int32_t *filterPos = c->hChrFilterPos;
     int16_t *filter    = c->hChrFilter;
     void    *mmx2FilterCode= c->chrMmx2FilterCode;
     int i;

From c23acbaed40101c677dfcfbbfe0d2c230a8e8f44 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Mon, 5 Mar 2012 16:01:19 -0800
Subject: [PATCH 07/11] Don't use ff_cropTbl[] for IDCT.

Results of IDCT can by far outreach the range of ff_cropTbl[], leading
to overreads and potentially crashes.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
CC: libav-stable@libav.org
---
 libavcodec/dsputil.c              | 70 ++++++++++------------
 libavcodec/h264idct_template.c    | 32 +++++-----
 libavcodec/rv34dsp.c              | 15 ++---
 libavcodec/simple_idct.c          | 19 +++---
 libavcodec/simple_idct_template.c | 34 +++++------
 libavcodec/svq3.c                 |  9 ++-
 libavcodec/vc1dsp.c               | 97 +++++++++++++------------------
 libavcodec/vp3dsp.c               | 68 +++++++++++-----------
 libavcodec/vp8dsp.c               | 18 +++---
 9 files changed, 161 insertions(+), 201 deletions(-)

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index bd10054234..29c5976596 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -366,18 +366,17 @@ void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
                              int line_size)
 {
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     /* read the pixels */
     for(i=0;i<8;i++) {
-        pixels[0] = cm[block[0]];
-        pixels[1] = cm[block[1]];
-        pixels[2] = cm[block[2]];
-        pixels[3] = cm[block[3]];
-        pixels[4] = cm[block[4]];
-        pixels[5] = cm[block[5]];
-        pixels[6] = cm[block[6]];
-        pixels[7] = cm[block[7]];
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+        pixels[2] = av_clip_uint8(block[2]);
+        pixels[3] = av_clip_uint8(block[3]);
+        pixels[4] = av_clip_uint8(block[4]);
+        pixels[5] = av_clip_uint8(block[5]);
+        pixels[6] = av_clip_uint8(block[6]);
+        pixels[7] = av_clip_uint8(block[7]);
 
         pixels += line_size;
         block += 8;
@@ -388,14 +387,13 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
                                  int line_size)
 {
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     /* read the pixels */
     for(i=0;i<4;i++) {
-        pixels[0] = cm[block[0]];
-        pixels[1] = cm[block[1]];
-        pixels[2] = cm[block[2]];
-        pixels[3] = cm[block[3]];
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+        pixels[2] = av_clip_uint8(block[2]);
+        pixels[3] = av_clip_uint8(block[3]);
 
         pixels += line_size;
         block += 8;
@@ -406,12 +404,11 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
                                  int line_size)
 {
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     /* read the pixels */
     for(i=0;i<2;i++) {
-        pixels[0] = cm[block[0]];
-        pixels[1] = cm[block[1]];
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
 
         pixels += line_size;
         block += 8;
@@ -443,18 +440,17 @@ void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
                              int line_size)
 {
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     /* read the pixels */
     for(i=0;i<8;i++) {
-        pixels[0] = cm[pixels[0] + block[0]];
-        pixels[1] = cm[pixels[1] + block[1]];
-        pixels[2] = cm[pixels[2] + block[2]];
-        pixels[3] = cm[pixels[3] + block[3]];
-        pixels[4] = cm[pixels[4] + block[4]];
-        pixels[5] = cm[pixels[5] + block[5]];
-        pixels[6] = cm[pixels[6] + block[6]];
-        pixels[7] = cm[pixels[7] + block[7]];
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+        pixels[4] = av_clip_uint8(pixels[4] + block[4]);
+        pixels[5] = av_clip_uint8(pixels[5] + block[5]);
+        pixels[6] = av_clip_uint8(pixels[6] + block[6]);
+        pixels[7] = av_clip_uint8(pixels[7] + block[7]);
         pixels += line_size;
         block += 8;
     }
@@ -464,14 +460,13 @@ static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
                           int line_size)
 {
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     /* read the pixels */
     for(i=0;i<4;i++) {
-        pixels[0] = cm[pixels[0] + block[0]];
-        pixels[1] = cm[pixels[1] + block[1]];
-        pixels[2] = cm[pixels[2] + block[2]];
-        pixels[3] = cm[pixels[3] + block[3]];
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
         pixels += line_size;
         block += 8;
     }
@@ -481,12 +476,11 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
                           int line_size)
 {
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     /* read the pixels */
     for(i=0;i<2;i++) {
-        pixels[0] = cm[pixels[0] + block[0]];
-        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
         pixels += line_size;
         block += 8;
     }
@@ -2733,15 +2727,11 @@ static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
 
 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
 {
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    dest[0] = cm[(block[0] + 4)>>3];
+    dest[0] = av_clip_uint8((block[0] + 4)>>3);
 }
 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
 {
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
+    dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
 }
 
 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index eba850ac6f..e476f89a6f 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -49,7 +49,6 @@ static const uint8_t scan8[16*3]={
 void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
 {
     int i;
-    INIT_CLIP
     pixel *dst = (pixel*)_dst;
     dctcoef *block = (dctcoef*)_block;
     stride /= sizeof(pixel);
@@ -74,16 +73,15 @@ void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
         const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
         const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
 
-        dst[i + 0*stride]= CLIP(dst[i + 0*stride] + ((z0 + z3) >> 6));
-        dst[i + 1*stride]= CLIP(dst[i + 1*stride] + ((z1 + z2) >> 6));
-        dst[i + 2*stride]= CLIP(dst[i + 2*stride] + ((z1 - z2) >> 6));
-        dst[i + 3*stride]= CLIP(dst[i + 3*stride] + ((z0 - z3) >> 6));
+        dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((z0 + z3) >> 6));
+        dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((z1 + z2) >> 6));
+        dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((z1 - z2) >> 6));
+        dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((z0 - z3) >> 6));
     }
 }
 
 void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
     int i;
-    INIT_CLIP
     pixel *dst = (pixel*)_dst;
     dctcoef *block = (dctcoef*)_block;
     stride /= sizeof(pixel);
@@ -143,14 +141,14 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
         const int b5 = (a3>>2) - a5;
         const int b7 =  a7 - (a1>>2);
 
-        dst[i + 0*stride] = CLIP( dst[i + 0*stride] + ((b0 + b7) >> 6) );
-        dst[i + 1*stride] = CLIP( dst[i + 1*stride] + ((b2 + b5) >> 6) );
-        dst[i + 2*stride] = CLIP( dst[i + 2*stride] + ((b4 + b3) >> 6) );
-        dst[i + 3*stride] = CLIP( dst[i + 3*stride] + ((b6 + b1) >> 6) );
-        dst[i + 4*stride] = CLIP( dst[i + 4*stride] + ((b6 - b1) >> 6) );
-        dst[i + 5*stride] = CLIP( dst[i + 5*stride] + ((b4 - b3) >> 6) );
-        dst[i + 6*stride] = CLIP( dst[i + 6*stride] + ((b2 - b5) >> 6) );
-        dst[i + 7*stride] = CLIP( dst[i + 7*stride] + ((b0 - b7) >> 6) );
+        dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((b0 + b7) >> 6) );
+        dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((b2 + b5) >> 6) );
+        dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((b4 + b3) >> 6) );
+        dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((b6 + b1) >> 6) );
+        dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((b6 - b1) >> 6) );
+        dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((b4 - b3) >> 6) );
+        dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((b2 - b5) >> 6) );
+        dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((b0 - b7) >> 6) );
     }
 }
 
@@ -158,13 +156,12 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
 void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
     int i, j;
     int dc = (((dctcoef*)block)[0] + 32) >> 6;
-    INIT_CLIP
     pixel *dst = (pixel*)_dst;
     stride /= sizeof(pixel);
     for( j = 0; j < 4; j++ )
     {
         for( i = 0; i < 4; i++ )
-            dst[i] = CLIP( dst[i] + dc );
+            dst[i] = av_clip_pixel( dst[i] + dc );
         dst += stride;
     }
 }
@@ -172,13 +169,12 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
 void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
     int i, j;
     int dc = (((dctcoef*)block)[0] + 32) >> 6;
-    INIT_CLIP
     pixel *dst = (pixel*)_dst;
     stride /= sizeof(pixel);
     for( j = 0; j < 8; j++ )
     {
         for( i = 0; i < 8; i++ )
-            dst[i] = CLIP( dst[i] + dc );
+            dst[i] = av_clip_pixel( dst[i] + dc );
         dst += stride;
     }
 }
diff --git a/libavcodec/rv34dsp.c b/libavcodec/rv34dsp.c
index ce5be93894..4145c4dd85 100644
--- a/libavcodec/rv34dsp.c
+++ b/libavcodec/rv34dsp.c
@@ -55,7 +55,6 @@ static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block)
  */
 static void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, DCTELEM *block){
     int      temp[16];
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int      i;
 
     rv34_row_transform(temp, block);
@@ -67,10 +66,10 @@ static void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, DCTELEM *block){
         const int z2 =  7* temp[4*1+i] - 17*temp[4*3+i];
         const int z3 = 17* temp[4*1+i] +  7*temp[4*3+i];
 
-        dst[0] = cm[ dst[0] + ( (z0 + z3) >> 10 ) ];
-        dst[1] = cm[ dst[1] + ( (z1 + z2) >> 10 ) ];
-        dst[2] = cm[ dst[2] + ( (z1 - z2) >> 10 ) ];
-        dst[3] = cm[ dst[3] + ( (z0 - z3) >> 10 ) ];
+        dst[0] = av_clip_uint8( dst[0] + ( (z0 + z3) >> 10 ) );
+        dst[1] = av_clip_uint8( dst[1] + ( (z1 + z2) >> 10 ) );
+        dst[2] = av_clip_uint8( dst[2] + ( (z1 - z2) >> 10 ) );
+        dst[3] = av_clip_uint8( dst[3] + ( (z0 - z3) >> 10 ) );
 
         dst  += stride;
     }
@@ -103,15 +102,13 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){
 
 static void rv34_idct_dc_add_c(uint8_t *dst, ptrdiff_t stride, int dc)
 {
-    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int i, j;
 
-    cm += (13*13*dc + 0x200) >> 10;
-
+    dc = (13*13*dc + 0x200) >> 10;
     for (i = 0; i < 4; i++)
     {
         for (j = 0; j < 4; j++)
-            dst[j] = cm[ dst[j] ];
+            dst[j] = av_clip_uint8( dst[j] + dc );
 
         dst += stride;
     }
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index 0c75261079..5812a87705 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -53,7 +53,6 @@
 static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col)
 {
     int c0, c1, c2, c3, a0, a1, a2, a3;
-    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     a0 = col[8*0];
     a1 = col[8*2];
@@ -63,13 +62,13 @@ static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col
     c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
     c1 = a1 * C1 + a3 * C2;
     c3 = a1 * C2 - a3 * C1;
-    dest[0] = cm[(c0 + c1) >> C_SHIFT];
+    dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT);
     dest += line_size;
-    dest[0] = cm[(c2 + c3) >> C_SHIFT];
+    dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT);
     dest += line_size;
-    dest[0] = cm[(c2 - c3) >> C_SHIFT];
+    dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT);
     dest += line_size;
-    dest[0] = cm[(c0 - c1) >> C_SHIFT];
+    dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT);
 }
 
 #define BF(k) \
@@ -133,7 +132,6 @@ void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
 static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
 {
     int c0, c1, c2, c3, a0, a1, a2, a3;
-    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     a0 = col[8*0];
     a1 = col[8*1];
@@ -143,13 +141,13 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col
     c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
     c1 = a1 * C1 + a3 * C2;
     c3 = a1 * C2 - a3 * C1;
-    dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
+    dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT));
     dest += line_size;
-    dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
+    dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT));
     dest += line_size;
-    dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
+    dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT));
     dest += line_size;
-    dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
+    dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT));
 }
 
 #define RN_SHIFT 15
@@ -161,7 +159,6 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col
 static inline void idct4row(DCTELEM *row)
 {
     int c0, c1, c2, c3, a0, a1, a2, a3;
-    //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     a0 = row[0];
     a1 = row[1];
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index fdec3aab2b..3c855e3825 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -224,50 +224,48 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
                                           DCTELEM *col)
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
-    INIT_CLIP;
 
     IDCT_COLS;
 
-    dest[0] = CLIP((a0 + b0) >> COL_SHIFT);
+    dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT);
     dest += line_size;
-    dest[0] = CLIP((a1 + b1) >> COL_SHIFT);
+    dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT);
     dest += line_size;
-    dest[0] = CLIP((a2 + b2) >> COL_SHIFT);
+    dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT);
     dest += line_size;
-    dest[0] = CLIP((a3 + b3) >> COL_SHIFT);
+    dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT);
     dest += line_size;
-    dest[0] = CLIP((a3 - b3) >> COL_SHIFT);
+    dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT);
     dest += line_size;
-    dest[0] = CLIP((a2 - b2) >> COL_SHIFT);
+    dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT);
     dest += line_size;
-    dest[0] = CLIP((a1 - b1) >> COL_SHIFT);
+    dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT);
     dest += line_size;
-    dest[0] = CLIP((a0 - b0) >> COL_SHIFT);
+    dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT);
 }
 
 static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
                                           DCTELEM *col)
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
-    INIT_CLIP;
 
     IDCT_COLS;
 
-    dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT));
+    dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
     dest += line_size;
-    dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT));
+    dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
     dest += line_size;
-    dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT));
+    dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
     dest += line_size;
-    dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT));
+    dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
     dest += line_size;
-    dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT));
+    dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
     dest += line_size;
-    dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT));
+    dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
     dest += line_size;
-    dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT));
+    dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
     dest += line_size;
-    dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT));
+    dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
 }
 
 static inline void FUNC(idctSparseCol)(DCTELEM *col)
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index e157061f64..caafc49969 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -173,7 +173,6 @@ void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp,
 {
     const int qmul = svq3_dequant_coeff[qp];
     int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     if (dc) {
         dc = 13*13*((dc == 1) ? 1538*block[0] : ((qmul*(block[0] >> 3)) / 2));
@@ -199,10 +198,10 @@ void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp,
         const int z3 = 17* block[i + 4*1] +  7*block[i + 4*3];
         const int rr = (dc + 0x80000);
 
-        dst[i + stride*0] = cm[ dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) ];
-        dst[i + stride*1] = cm[ dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) ];
-        dst[i + stride*2] = cm[ dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) ];
-        dst[i + stride*3] = cm[ dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) ];
+        dst[i + stride*0] = av_clip_uint8( dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) );
+        dst[i + stride*1] = av_clip_uint8( dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) );
+        dst[i + stride*2] = av_clip_uint8( dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) );
+        dst[i + stride*3] = av_clip_uint8( dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) );
     }
 }
 
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index 9bd107cdd9..b40824b86a 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -139,8 +139,6 @@ static void vc1_h_s_overlap_c(DCTELEM *left, DCTELEM *right)
  * @see 8.6
  */
 static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
     int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3;
     int a0_sign = a0 >> 31;        /* Store sign */
     a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
@@ -163,8 +161,8 @@ static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){
                 else{
                     d = FFMIN(d, clip);
                     d = (d ^ d_sign) - d_sign;          /* Restore sign */
-                    src[-1*stride] = cm[src[-1*stride] - d];
-                    src[ 0*stride] = cm[src[ 0*stride] + d];
+                    src[-1*stride] = av_clip_uint8(src[-1*stride] - d);
+                    src[ 0*stride] = av_clip_uint8(src[ 0*stride] + d);
                 }
                 return 1;
             }
@@ -234,19 +232,17 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
 {
     int i;
     int dc = block[0];
-    const uint8_t *cm;
     dc = (3 * dc +  1) >> 1;
     dc = (3 * dc + 16) >> 5;
-    cm = ff_cropTbl + MAX_NEG_CROP + dc;
     for(i = 0; i < 8; i++){
-        dest[0] = cm[dest[0]];
-        dest[1] = cm[dest[1]];
-        dest[2] = cm[dest[2]];
-        dest[3] = cm[dest[3]];
-        dest[4] = cm[dest[4]];
-        dest[5] = cm[dest[5]];
-        dest[6] = cm[dest[6]];
-        dest[7] = cm[dest[7]];
+        dest[0] = av_clip_uint8(dest[0] + dc);
+        dest[1] = av_clip_uint8(dest[1] + dc);
+        dest[2] = av_clip_uint8(dest[2] + dc);
+        dest[3] = av_clip_uint8(dest[3] + dc);
+        dest[4] = av_clip_uint8(dest[4] + dc);
+        dest[5] = av_clip_uint8(dest[5] + dc);
+        dest[6] = av_clip_uint8(dest[6] + dc);
+        dest[7] = av_clip_uint8(dest[7] + dc);
         dest += linesize;
     }
 }
@@ -326,19 +322,17 @@ static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
 {
     int i;
     int dc = block[0];
-    const uint8_t *cm;
     dc = ( 3 * dc +  1) >> 1;
     dc = (17 * dc + 64) >> 7;
-    cm = ff_cropTbl + MAX_NEG_CROP + dc;
     for(i = 0; i < 4; i++){
-        dest[0] = cm[dest[0]];
-        dest[1] = cm[dest[1]];
-        dest[2] = cm[dest[2]];
-        dest[3] = cm[dest[3]];
-        dest[4] = cm[dest[4]];
-        dest[5] = cm[dest[5]];
-        dest[6] = cm[dest[6]];
-        dest[7] = cm[dest[7]];
+        dest[0] = av_clip_uint8(dest[0] + dc);
+        dest[1] = av_clip_uint8(dest[1] + dc);
+        dest[2] = av_clip_uint8(dest[2] + dc);
+        dest[3] = av_clip_uint8(dest[3] + dc);
+        dest[4] = av_clip_uint8(dest[4] + dc);
+        dest[5] = av_clip_uint8(dest[5] + dc);
+        dest[6] = av_clip_uint8(dest[6] + dc);
+        dest[7] = av_clip_uint8(dest[7] + dc);
         dest += linesize;
     }
 }
@@ -348,7 +342,6 @@ static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block)
     int i;
     register int t1,t2,t3,t4,t5,t6,t7,t8;
     DCTELEM *src, *dst;
-    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     src = block;
     dst = block;
@@ -388,10 +381,10 @@ static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block)
         t3 = 22 * src[ 8] + 10 * src[24];
         t4 = 22 * src[24] - 10 * src[ 8];
 
-        dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)];
-        dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)];
-        dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)];
-        dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)];
+        dest[0*linesize] = av_clip_uint8(dest[0*linesize] + ((t1 + t3) >> 7));
+        dest[1*linesize] = av_clip_uint8(dest[1*linesize] + ((t2 - t4) >> 7));
+        dest[2*linesize] = av_clip_uint8(dest[2*linesize] + ((t2 + t4) >> 7));
+        dest[3*linesize] = av_clip_uint8(dest[3*linesize] + ((t1 - t3) >> 7));
 
         src ++;
         dest++;
@@ -404,15 +397,13 @@ static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
 {
     int i;
     int dc = block[0];
-    const uint8_t *cm;
     dc = (17 * dc +  4) >> 3;
     dc = (12 * dc + 64) >> 7;
-    cm = ff_cropTbl + MAX_NEG_CROP + dc;
     for(i = 0; i < 8; i++){
-        dest[0] = cm[dest[0]];
-        dest[1] = cm[dest[1]];
-        dest[2] = cm[dest[2]];
-        dest[3] = cm[dest[3]];
+        dest[0] = av_clip_uint8(dest[0] + dc);
+        dest[1] = av_clip_uint8(dest[1] + dc);
+        dest[2] = av_clip_uint8(dest[2] + dc);
+        dest[3] = av_clip_uint8(dest[3] + dc);
         dest += linesize;
     }
 }
@@ -422,7 +413,6 @@ static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block)
     int i;
     register int t1,t2,t3,t4,t5,t6,t7,t8;
     DCTELEM *src, *dst;
-    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     src = block;
     dst = block;
@@ -458,14 +448,14 @@ static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block)
         t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
         t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
 
-        dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1) >> 7)];
-        dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2) >> 7)];
-        dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3) >> 7)];
-        dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4) >> 7)];
-        dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 1) >> 7)];
-        dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 1) >> 7)];
-        dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 1) >> 7)];
-        dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 1) >> 7)];
+        dest[0*linesize] = av_clip_uint8(dest[0*linesize] + ((t5 + t1) >> 7));
+        dest[1*linesize] = av_clip_uint8(dest[1*linesize] + ((t6 + t2) >> 7));
+        dest[2*linesize] = av_clip_uint8(dest[2*linesize] + ((t7 + t3) >> 7));
+        dest[3*linesize] = av_clip_uint8(dest[3*linesize] + ((t8 + t4) >> 7));
+        dest[4*linesize] = av_clip_uint8(dest[4*linesize] + ((t8 - t4 + 1) >> 7));
+        dest[5*linesize] = av_clip_uint8(dest[5*linesize] + ((t7 - t3 + 1) >> 7));
+        dest[6*linesize] = av_clip_uint8(dest[6*linesize] + ((t6 - t2 + 1) >> 7));
+        dest[7*linesize] = av_clip_uint8(dest[7*linesize] + ((t5 - t1 + 1) >> 7));
 
         src ++;
         dest++;
@@ -478,15 +468,13 @@ static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
 {
     int i;
     int dc = block[0];
-    const uint8_t *cm;
     dc = (17 * dc +  4) >> 3;
     dc = (17 * dc + 64) >> 7;
-    cm = ff_cropTbl + MAX_NEG_CROP + dc;
     for(i = 0; i < 4; i++){
-        dest[0] = cm[dest[0]];
-        dest[1] = cm[dest[1]];
-        dest[2] = cm[dest[2]];
-        dest[3] = cm[dest[3]];
+        dest[0] = av_clip_uint8(dest[0] + dc);
+        dest[1] = av_clip_uint8(dest[1] + dc);
+        dest[2] = av_clip_uint8(dest[2] + dc);
+        dest[3] = av_clip_uint8(dest[3] + dc);
         dest += linesize;
     }
 }
@@ -496,7 +484,6 @@ static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block)
     int i;
     register int t1,t2,t3,t4;
     DCTELEM *src, *dst;
-    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     src = block;
     dst = block;
@@ -522,10 +509,10 @@ static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block)
         t3 = 22 * src[ 8] + 10 * src[24];
         t4 = 22 * src[24] - 10 * src[ 8];
 
-        dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)];
-        dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)];
-        dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)];
-        dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)];
+        dest[0*linesize] = av_clip_uint8(dest[0*linesize] + ((t1 + t3) >> 7));
+        dest[1*linesize] = av_clip_uint8(dest[1*linesize] + ((t2 - t4) >> 7));
+        dest[2*linesize] = av_clip_uint8(dest[2*linesize] + ((t2 + t4) >> 7));
+        dest[3*linesize] = av_clip_uint8(dest[3*linesize] + ((t1 - t3) >> 7));
 
         src ++;
         dest++;
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index baa22a5519..438ae76b57 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -41,7 +41,6 @@
 static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
 {
     int16_t *ip = input;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
     int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
     int Ed, Gd, Add, Bdd, Fd, Hd;
@@ -147,29 +146,29 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
                 ip[5*8] = (Fd + Bdd ) >> 4;
                 ip[6*8] = (Fd - Bdd ) >> 4;
             }else if(type==1){
-                dst[0*stride] = cm[(Gd + Cd )  >> 4];
-                dst[7*stride] = cm[(Gd - Cd )  >> 4];
+                dst[0*stride] = av_clip_uint8((Gd + Cd )  >> 4);
+                dst[7*stride] = av_clip_uint8((Gd - Cd )  >> 4);
 
-                dst[1*stride] = cm[(Add + Hd ) >> 4];
-                dst[2*stride] = cm[(Add - Hd ) >> 4];
+                dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4);
+                dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4);
 
-                dst[3*stride] = cm[(Ed + Dd )  >> 4];
-                dst[4*stride] = cm[(Ed - Dd )  >> 4];
+                dst[3*stride] = av_clip_uint8((Ed + Dd )  >> 4);
+                dst[4*stride] = av_clip_uint8((Ed - Dd )  >> 4);
 
-                dst[5*stride] = cm[(Fd + Bdd ) >> 4];
-                dst[6*stride] = cm[(Fd - Bdd ) >> 4];
+                dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4);
+                dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4);
             }else{
-                dst[0*stride] = cm[dst[0*stride] + ((Gd + Cd )  >> 4)];
-                dst[7*stride] = cm[dst[7*stride] + ((Gd - Cd )  >> 4)];
+                dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd )  >> 4));
+                dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd )  >> 4));
 
-                dst[1*stride] = cm[dst[1*stride] + ((Add + Hd ) >> 4)];
-                dst[2*stride] = cm[dst[2*stride] + ((Add - Hd ) >> 4)];
+                dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4));
+                dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4));
 
-                dst[3*stride] = cm[dst[3*stride] + ((Ed + Dd )  >> 4)];
-                dst[4*stride] = cm[dst[4*stride] + ((Ed - Dd )  >> 4)];
+                dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd )  >> 4));
+                dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd )  >> 4));
 
-                dst[5*stride] = cm[dst[5*stride] + ((Fd + Bdd ) >> 4)];
-                dst[6*stride] = cm[dst[6*stride] + ((Fd - Bdd ) >> 4)];
+                dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4));
+                dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4));
             }
 
         } else {
@@ -190,18 +189,18 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
                 dst[4*stride]=
                 dst[5*stride]=
                 dst[6*stride]=
-                dst[7*stride]= cm[128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20)];
+                dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20));
             }else{
                 if(ip[0*8]){
                     int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
-                    dst[0*stride] = cm[dst[0*stride] + v];
-                    dst[1*stride] = cm[dst[1*stride] + v];
-                    dst[2*stride] = cm[dst[2*stride] + v];
-                    dst[3*stride] = cm[dst[3*stride] + v];
-                    dst[4*stride] = cm[dst[4*stride] + v];
-                    dst[5*stride] = cm[dst[5*stride] + v];
-                    dst[6*stride] = cm[dst[6*stride] + v];
-                    dst[7*stride] = cm[dst[7*stride] + v];
+                    dst[0*stride] = av_clip_uint8(dst[0*stride] + v);
+                    dst[1*stride] = av_clip_uint8(dst[1*stride] + v);
+                    dst[2*stride] = av_clip_uint8(dst[2*stride] + v);
+                    dst[3*stride] = av_clip_uint8(dst[3*stride] + v);
+                    dst[4*stride] = av_clip_uint8(dst[4*stride] + v);
+                    dst[5*stride] = av_clip_uint8(dst[5*stride] + v);
+                    dst[6*stride] = av_clip_uint8(dst[6*stride] + v);
+                    dst[7*stride] = av_clip_uint8(dst[7*stride] + v);
                 }
             }
         }
@@ -225,17 +224,16 @@ void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*
 
 void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){
     int i, dc = (block[0] + 15) >> 5;
-    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
 
     for(i = 0; i < 8; i++){
-        dest[0] = cm[dest[0]];
-        dest[1] = cm[dest[1]];
-        dest[2] = cm[dest[2]];
-        dest[3] = cm[dest[3]];
-        dest[4] = cm[dest[4]];
-        dest[5] = cm[dest[5]];
-        dest[6] = cm[dest[6]];
-        dest[7] = cm[dest[7]];
+        dest[0] = av_clip_uint8(dest[0] + dc);
+        dest[1] = av_clip_uint8(dest[1] + dc);
+        dest[2] = av_clip_uint8(dest[2] + dc);
+        dest[3] = av_clip_uint8(dest[3] + dc);
+        dest[4] = av_clip_uint8(dest[4] + dc);
+        dest[5] = av_clip_uint8(dest[5] + dc);
+        dest[6] = av_clip_uint8(dest[6] + dc);
+        dest[7] = av_clip_uint8(dest[7] + dc);
         dest += line_size;
     }
 }
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c
index 86dc42ed37..f6c944328a 100644
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -80,7 +80,6 @@ static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16])
 static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
 {
     int i, t0, t1, t2, t3;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     DCTELEM tmp[16];
 
     for (i = 0; i < 4; i++) {
@@ -105,10 +104,10 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
         t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
         t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
 
-        dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)];
-        dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)];
-        dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)];
-        dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)];
+        dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
+        dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
+        dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
+        dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
         dst += stride;
     }
 }
@@ -116,14 +115,13 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
 static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride)
 {
     int i, dc = (block[0] + 4) >> 3;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
     block[0] = 0;
 
     for (i = 0; i < 4; i++) {
-        dst[0] = cm[dst[0]];
-        dst[1] = cm[dst[1]];
-        dst[2] = cm[dst[2]];
-        dst[3] = cm[dst[3]];
+        dst[0] = av_clip_uint8(dst[0] + dc);
+        dst[1] = av_clip_uint8(dst[1] + dc);
+        dst[2] = av_clip_uint8(dst[2] + dc);
+        dst[3] = av_clip_uint8(dst[3] + dc);
         dst += stride;
     }
 }

From 11b940a1a8e7e5d5b212935a3ce78aeda577f5f2 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Mon, 5 Mar 2012 17:03:32 -0800
Subject: [PATCH 08/11] svq3: protect against negative quantizers.

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
CC: libav-stable@libav.org
---
 libavcodec/svq3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index caafc49969..99c1dec07c 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -650,7 +650,7 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type)
     if (IS_INTRA16x16(mb_type) || (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
         s->qscale += svq3_get_se_golomb(&s->gb);
 
-        if (s->qscale > 31){
+        if (s->qscale > 31u){
             av_log(h->s.avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
             return -1;
         }

From 6193ff68549ecbaf1a4d63a0e06964ec580ac620 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 6 Mar 2012 10:27:05 -0800
Subject: [PATCH 09/11] error_resilience: initialize s->block_index[].

Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind
CC: libav-stable@libav.org
---
 libavcodec/error_resilience.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index 0fab8be1fd..d14adb4437 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -419,9 +419,14 @@ static void guess_mv(MpegEncContext *s)
     if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) ||
         num_avail <= mb_width / 2) {
         for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+            s->mb_x = 0;
+            s->mb_y = mb_y;
+            ff_init_block_index(s);
             for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
                 const int mb_xy = mb_x + mb_y * s->mb_stride;
 
+                ff_update_block_index(s);
+
                 if (IS_INTRA(s->current_picture.f.mb_type[mb_xy]))
                     continue;
                 if (!(s->error_status_table[mb_xy] & ER_MV_ERROR))
@@ -456,6 +461,9 @@ static void guess_mv(MpegEncContext *s)
 
             changed = 0;
             for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+                s->mb_x = 0;
+                s->mb_y = mb_y;
+                ff_init_block_index(s);
                 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
                     const int mb_xy        = mb_x + mb_y * s->mb_stride;
                     int mv_predictor[8][2] = { { 0 } };
@@ -467,6 +475,8 @@ static void guess_mv(MpegEncContext *s)
                     const int mot_index    = (mb_x + mb_y * mot_stride) * mot_step;
                     int prev_x, prev_y, prev_ref;
 
+                    ff_update_block_index(s);
+
                     if ((mb_x ^ mb_y ^ pass) & 1)
                         continue;
 
@@ -1072,11 +1082,16 @@ void ff_er_frame_end(MpegEncContext *s)
 
     /* handle inter blocks with damaged AC */
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+        s->mb_x = 0;
+        s->mb_y = mb_y;
+        ff_init_block_index(s);
         for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
             const int mb_xy   = mb_x + mb_y * s->mb_stride;
             const int mb_type = s->current_picture.f.mb_type[mb_xy];
             int dir           = !s->last_picture.f.data[0];
 
+            ff_update_block_index(s);
+
             error = s->error_status_table[mb_xy];
 
             if (IS_INTRA(mb_type))
@@ -1114,11 +1129,16 @@ void ff_er_frame_end(MpegEncContext *s)
     /* guess MVs */
     if (s->pict_type == AV_PICTURE_TYPE_B) {
         for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+            s->mb_x = 0;
+            s->mb_y = mb_y;
+            ff_init_block_index(s);
             for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
                 int       xy      = mb_x * 2 + mb_y * 2 * s->b8_stride;
                 const int mb_xy   = mb_x + mb_y * s->mb_stride;
                 const int mb_type = s->current_picture.f.mb_type[mb_xy];
 
+                ff_update_block_index(s);
+
                 error = s->error_status_table[mb_xy];
 
                 if (IS_INTRA(mb_type))

From a9c5b6f6020c3fad6bf59985518e08453e002cdf Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 6 Mar 2012 12:47:23 -0800
Subject: [PATCH 10/11] cpu: initialize mask to -1, so that by default,
 optimizations are used.

---
 libavutil/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index e72f7231a8..c44075be29 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -19,7 +19,7 @@
 #include "cpu.h"
 #include "config.h"
 
-static int cpuflags_mask, checked;
+static int cpuflags_mask = -1, checked;
 
 int av_get_cpu_flags(void)
 {

From b5161908e06b4497bf663510fb495ba97a6fd2b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Tue, 6 Mar 2012 22:11:30 +0100
Subject: [PATCH 11/11] SBR DSP: fix SSE code to not use SSE2 instructions.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

movq from SSE register _to_ memory is an SSE2 instruction.
Use the SSE movlps function instead that does the same thing.

Signed-off-by: Reimar DÃ¶ffinger <Reimar.Doeffinger@gmx.de>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
---
 libavcodec/x86/sbrdsp.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index c165c52ca4..c3b559bb15 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -104,7 +104,7 @@ cglobal sbr_hf_g_filt, 5, 6, 5
     movq        m2, [r1]
     punpckldq   m0, m0
     mulps       m2, m0
-    movq      [r0], m2
+    movlps    [r0], m2
     add         r0, 8
     add         r2, 4
     add         r1, STEP