From 39da3b223fe52ec5b51e4af7d123b47b890efd8f Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 6 Mar 2012 08:57:58 +0100 Subject: [PATCH 01/11] avconv: fix counting encoded video size. avcodec_encode_video2() return value is 0 on success, encoded frame size is stored in the packet. --- avconv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/avconv.c b/avconv.c index 8526961f7a..f9ad054276 100644 --- a/avconv.c +++ b/avconv.c @@ -1475,8 +1475,8 @@ static void do_video_out(AVFormatContext *s, pkt.dts = av_rescale_q(pkt.dts, enc->time_base, ost->st->time_base); write_frame(s, &pkt, ost); - *frame_size = ret; - video_size += ret; + *frame_size = pkt.size; + video_size += pkt.size; /* if two pass, output log */ if (ost->logfile && enc->stats_out) { From 338978a7c17d303672bcf5e035e54da362274a18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 5 Mar 2012 21:54:17 +0200 Subject: [PATCH 02/11] libx264: Allow overriding the sliced threads option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Martin Storsjö --- libavcodec/libx264.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index 3934627537..abce6a858d 100644 --- a/libavcodec/libx264.c +++ b/libavcodec/libx264.c @@ -379,6 +379,8 @@ static av_cold int X264_init(AVCodecContext *avctx) x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR; x4->params.i_threads = avctx->thread_count; + if (avctx->thread_type) + x4->params.b_sliced_threads = avctx->thread_type == FF_THREAD_SLICE; x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT; @@ -536,6 +538,7 @@ static const AVCodecDefault x264_defaults[] = { { "coder", "-1" }, { "cmp", "-1" }, { "threads", AV_STRINGIFY(X264_THREADS_AUTO) }, + { "thread_type", "0" }, { NULL }, }; From 4d851f8dcf951d380e935ef14ae01db813adfc2d Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sun, 4 Mar 2012 16:08:48 +0100 Subject: [PATCH 03/11] cpu: add av_set_cpu_flags_mask(). --- doc/APIchanges | 3 +++ libavutil/avutil.h | 2 +- libavutil/cpu.c | 12 +++++++++++- libavutil/cpu.h | 8 ++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/APIchanges b/doc/APIchanges index c3755299dc..8ddd4d3742 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -12,6 +12,9 @@ libavutil: 2011-04-18 API changes, most recent first: +2012-03-xx - xxxxxxx - lavu 51.25.0 - cpu.h + Add av_set_cpu_flags_mask(). + 2012-xx-xx - lavc 54.8.0 xxxxxxx Add av_get_exact_bits_per_sample() xxxxxxx Add av_get_audio_frame_duration() diff --git a/libavutil/avutil.h b/libavutil/avutil.h index e4219eb87f..21fc737114 100644 --- a/libavutil/avutil.h +++ b/libavutil/avutil.h @@ -152,7 +152,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 51 -#define LIBAVUTIL_VERSION_MINOR 24 +#define LIBAVUTIL_VERSION_MINOR 25 #define LIBAVUTIL_VERSION_MICRO 0 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 25895d6d5d..e72f7231a8 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -19,9 +19,11 @@ #include "cpu.h" #include "config.h" +static int cpuflags_mask, checked; + int av_get_cpu_flags(void) { - static int flags, checked; + static int flags; if (checked) return flags; @@ -30,10 +32,18 @@ int av_get_cpu_flags(void) if (ARCH_PPC) flags = ff_get_cpu_flags_ppc(); if (ARCH_X86) flags = ff_get_cpu_flags_x86(); + flags &= cpuflags_mask; checked = 1; + return flags; } +void av_set_cpu_flags_mask(int mask) +{ + cpuflags_mask = mask; + checked = 0; +} + #ifdef TEST #undef printf diff --git a/libavutil/cpu.h b/libavutil/cpu.h index df7bf4421a..361fe9866a 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -48,6 +48,14 @@ */ int av_get_cpu_flags(void); +/** + * Set a mask on flags returned by av_get_cpu_flags(). + * This function is mainly useful for testing. + * + * @warning this function is not thread safe. + */ +void av_set_cpu_flags_mask(int mask); + /* The following CPU-specific functions shall not be called directly. */ int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); From 4138cd29077de2fbca9c49e96f70d21a78e24e33 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sun, 4 Mar 2012 16:46:45 +0100 Subject: [PATCH 04/11] avconv: add -cpuflags option for setting supported cpuflags. Useful for testing. --- avconv.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ cmdutils.c | 7 ++---- cmdutils.h | 6 +++++ doc/avconv.texi | 4 ++++ 4 files changed, 76 insertions(+), 5 deletions(-) diff --git a/avconv.c b/avconv.c index f9ad054276..53a7661640 100644 --- a/avconv.c +++ b/avconv.c @@ -4397,6 +4397,67 @@ static int opt_deinterlace(const char *opt, const char *arg) return 0; } +static int opt_cpuflags(const char *opt, const char *arg) +{ +#define CPUFLAG_MMX2 (AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMX2) +#define CPUFLAG_3DNOW (AV_CPU_FLAG_3DNOW | AV_CPU_FLAG_MMX) +#define CPUFLAG_3DNOWEXT (AV_CPU_FLAG_3DNOWEXT | CPUFLAG_3DNOW) +#define CPUFLAG_SSE (AV_CPU_FLAG_SSE | CPUFLAG_MMX2) +#define CPUFLAG_SSE2 (AV_CPU_FLAG_SSE2 | CPUFLAG_SSE) +#define CPUFLAG_SSE2SLOW (AV_CPU_FLAG_SSE2SLOW | CPUFLAG_SSE2) +#define CPUFLAG_SSE3 (AV_CPU_FLAG_SSE3 | CPUFLAG_SSE2) +#define CPUFLAG_SSE3SLOW (AV_CPU_FLAG_SSE3SLOW | CPUFLAG_SSE3) +#define CPUFLAG_SSSE3 (AV_CPU_FLAG_SSSE3 | CPUFLAG_SSE3) +#define CPUFLAG_SSE4 (AV_CPU_FLAG_SSE4 | CPUFLAG_SSSE3) +#define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4) +#define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) +#define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) +#define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) + static const AVOption cpuflags_opts[] = { + { "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { 0 }, INT64_MIN, INT64_MAX, .unit = "flags" }, + { "altivec" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ALTIVEC }, .unit = "flags" }, + { "mmx" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_MMX }, .unit = "flags" }, + { "mmx2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_MMX2 }, .unit = "flags" }, + { "sse" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE }, .unit = "flags" }, + { "sse2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2 }, .unit = "flags" }, + { "sse2slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE2SLOW }, .unit = "flags" }, + { "sse3" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3 }, .unit = "flags" }, + { "sse3slow", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE3SLOW }, .unit = "flags" }, + { "ssse3" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSSE3 }, .unit = "flags" }, + { "atom" , NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ATOM }, .unit = "flags" }, + { "sse4.1" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE4 }, .unit = "flags" }, + { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_SSE42 }, .unit = "flags" }, + { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_AVX }, .unit = "flags" }, + { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_XOP }, .unit = "flags" }, + { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_FMA4 }, .unit = "flags" }, + { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOW }, .unit = "flags" }, + { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOWEXT }, .unit = "flags" }, + { NULL }, + }; + static const AVClass class = { + .class_name = "cpuflags", + .item_name = av_default_item_name, + .option = cpuflags_opts, + .version = LIBAVUTIL_VERSION_INT, + }; + + int flags = 0, ret; + const AVClass *pclass = &class; + + if ((ret = av_opt_eval_flags(&pclass, &cpuflags_opts[0], arg, &flags)) < 0) + return ret; + + av_set_cpu_flags_mask(flags); + return 0; +} + +static void parse_cpuflags(int argc, char **argv, const OptionDef *options) +{ + int idx = locate_option(argc, argv, options, "cpuflags"); + if (idx && argv[idx + 1]) + opt_cpuflags("cpuflags", argv[idx + 1]); +} + #define OFFSET(x) offsetof(OptionsContext, x) static const OptionDef options[] = { /* main options */ @@ -4446,6 +4507,7 @@ static const OptionDef options[] = { { "stats", OPT_BOOL, {&print_stats}, "print progress report during encoding", }, { "attach", HAS_ARG | OPT_FUNC2, {(void*)opt_attach}, "add an attachment to the output file", "filename" }, { "dump_attachment", HAS_ARG | OPT_STRING | OPT_SPEC, {.off = OFFSET(dump_attachment)}, "extract an attachment into a file", "filename" }, + { "cpuflags", HAS_ARG | OPT_EXPERT, {(void*)opt_cpuflags}, "set CPU flags mask", "mask" }, /* video options */ { "vframes", HAS_ARG | OPT_VIDEO | OPT_FUNC2, {(void*)opt_video_frames}, "set the number of video frames to record", "number" }, @@ -4532,6 +4594,8 @@ int main(int argc, char **argv) show_banner(); + parse_cpuflags(argc, argv, options); + /* parse options */ parse_options(&o, argc, argv, options, opt_output_file); diff --git a/cmdutils.c b/cmdutils.c index ed0813f986..336f50bfd8 100644 --- a/cmdutils.c +++ b/cmdutils.c @@ -320,11 +320,8 @@ void parse_options(void *optctx, int argc, char **argv, const OptionDef *options } } -/* - * Return index of option opt in argv or 0 if not found. - */ -static int locate_option(int argc, char **argv, const OptionDef *options, - const char *optname) +int locate_option(int argc, char **argv, const OptionDef *options, + const char *optname) { const OptionDef *po; int i; diff --git a/cmdutils.h b/cmdutils.h index 37fd4d3b67..67126493a4 100644 --- a/cmdutils.h +++ b/cmdutils.h @@ -189,6 +189,12 @@ int parse_option(void *optctx, const char *opt, const char *arg, */ void parse_loglevel(int argc, char **argv, const OptionDef *options); +/** + * Return index of option opt in argv or 0 if not found. + */ +int locate_option(int argc, char **argv, const OptionDef *options, + const char *optname); + /** * Check if the given stream matches a stream specifier. * diff --git a/doc/avconv.texi b/doc/avconv.texi index 25fb12fdc0..91283a4831 100644 --- a/doc/avconv.texi +++ b/doc/avconv.texi @@ -808,6 +808,10 @@ avconv -i file.mov -an -vn -bsf:s mov2textsub -c:s copy -f rawvideo sub.txt @item -tag[:@var{stream_specifier}] @var{codec_tag} (@emph{output,per-stream}) Force a tag/fourcc for matching streams. + +@item -cpuflags mask (@emph{global}) +Set a mask that's applied to autodetected CPU flags. This option is intended +for testing. Do not use it unless you know what you're doing. @end table @c man end OPTIONS From 018f39ef496c768f7cd580a96ae971e03c78205e Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Mon, 5 Mar 2012 08:05:56 +0100 Subject: [PATCH 05/11] FATE: add CPUFLAGS variable, mapping to -cpuflags avconv option. --- doc/fate.texi | 4 +++- tests/Makefile | 2 +- tests/fate-run.sh | 5 +++-- tests/regression-funcs.sh | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/fate.texi b/doc/fate.texi index b4f520fef9..c1011e73f8 100644 --- a/doc/fate.texi +++ b/doc/fate.texi @@ -75,10 +75,12 @@ meaning only while running the regression tests. @item THREADS Specify how many threads to use while running regression tests, it is quite useful to detect thread-related regressions. +@item CPUFLAGS +Specify a mask to be applied to autodetected CPU flags. @end table @example - make V=1 SAMPLES=/var/fate/samples THREADS=2 fate + make V=1 SAMPLES=/var/fate/samples THREADS=2 CPUFLAGS=mmx fate @end example @chapter Automated Tests diff --git a/tests/Makefile b/tests/Makefile index 6fed995e0a..5c65237b44 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -116,7 +116,7 @@ fate: $(FATE) $(FATE): avconv$(EXESUF) $(FATE_UTILS:%=tests/%$(HOSTEXESUF)) @echo "TEST $(@:fate-%=%)" - $(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' + $(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)' fate-list: @printf '%s\n' $(sort $(FATE)) diff --git a/tests/fate-run.sh b/tests/fate-run.sh index 07cfe74ba7..a1503aecbd 100755 --- a/tests/fate-run.sh +++ b/tests/fate-run.sh @@ -17,6 +17,7 @@ ref=${7:-"${base}/ref/fate/${test}"} fuzz=$8 threads=${9:-1} thread_type=${10:-frame+slice} +cpuflags=${11:-all} outdir="tests/data/fate" outfile="${outdir}/${test}" @@ -50,7 +51,7 @@ run(){ } avconv(){ - run avconv -nostats -threads $threads -thread_type $thread_type "$@" + run avconv -nostats -threads $threads -thread_type $thread_type -cpuflags $cpuflags "$@" } framecrc(){ @@ -76,7 +77,7 @@ pcm(){ regtest(){ t="${test#$2-}" ref=${base}/ref/$2/$t - ${base}/${1}-regression.sh $t $2 $3 "$target_exec" "$target_path" "$threads" "$thread_type" + ${base}/${1}-regression.sh $t $2 $3 "$target_exec" "$target_path" "$threads" "$thread_type" "$cpuflags" } codectest(){ diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh index 3306f39941..3889129009 100755 --- a/tests/regression-funcs.sh +++ b/tests/regression-funcs.sh @@ -10,6 +10,7 @@ raw_src_dir=$3 target_exec=$4 target_path=$5 threads=${6:-1} +cpuflags=${8:-all} datadir="./tests/data" target_datadir="${target_path}/${datadir}" @@ -43,7 +44,7 @@ echov(){ . $(dirname $0)/md5.sh -AVCONV_OPTS="-nostats -y" +AVCONV_OPTS="-nostats -y -cpuflags $cpuflags" COMMON_OPTS="-flags +bitexact -idct simple -sws_flags +accurate_rnd+bitexact" DEC_OPTS="$COMMON_OPTS -threads $threads" ENC_OPTS="$COMMON_OPTS -threads 1 -dct fastint" From 2254b559cbcfc0418135f09add37c0a5866b1981 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 5 Mar 2012 12:26:42 -0800 Subject: [PATCH 06/11] swscale: make filterPos 32bit. Fixes overflows for large image sizes. Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind CC: libav-stable@libav.org --- libswscale/ppc/swscale_altivec.c | 2 +- libswscale/swscale.c | 20 ++++++++++---------- libswscale/swscale_internal.h | 12 ++++++------ libswscale/utils.c | 4 ++-- libswscale/x86/scale.asm | 31 +++++++++++++++++-------------- libswscale/x86/swscale_mmx.c | 6 +++--- libswscale/x86/swscale_template.c | 4 ++-- 7 files changed, 41 insertions(+), 38 deletions(-) diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 3041053096..5537707bd0 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -144,7 +144,7 @@ static void yuv2planeX_altivec(const int16_t *filter, int filterSize, static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, - const int16_t *filterPos, int filterSize) + const int32_t *filterPos, int filterSize) { register int i; DECLARE_ALIGNED(16, int, tempo)[4]; diff --git a/libswscale/swscale.c b/libswscale/swscale.c index bd909bf163..a98a4bb90f 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -61,7 +61,7 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride, static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, - const int16_t *filterPos, int filterSize) + const int32_t *filterPos, int filterSize) { int i; int32_t *dst = (int32_t *) _dst; @@ -84,7 +84,7 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, const int16_t *filter, - const int16_t *filterPos, int filterSize) + const int32_t *filterPos, int filterSize) { int i; const uint16_t *src = (const uint16_t *) _src; @@ -105,7 +105,7 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t // bilinear / bicubic scaling static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, - const int16_t *filter, const int16_t *filterPos, + const int16_t *filter, const int32_t *filterPos, int filterSize) { int i; @@ -123,7 +123,7 @@ static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t * } static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src, - const int16_t *filter, const int16_t *filterPos, + const int16_t *filter, const int32_t *filterPos, int filterSize) { int i; @@ -224,7 +224,7 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src_in[4], int srcW, int xInc, const int16_t *hLumFilter, - const int16_t *hLumFilterPos, int hLumFilterSize, + const int32_t *hLumFilterPos, int hLumFilterSize, uint8_t *formatConvBuffer, uint32_t *pal, int isAlpha) { @@ -268,7 +268,7 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src_in[4], int srcW, int xInc, const int16_t *hChrFilter, - const int16_t *hChrFilterPos, int hChrFilterSize, + const int32_t *hChrFilterPos, int hChrFilterSize, uint8_t *formatConvBuffer, uint32_t *pal) { const uint8_t *src1 = src_in[1], *src2 = src_in[2]; @@ -312,10 +312,10 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int chrXInc= c->chrXInc; const enum PixelFormat dstFormat= c->dstFormat; const int flags= c->flags; - int16_t *vLumFilterPos= c->vLumFilterPos; - int16_t *vChrFilterPos= c->vChrFilterPos; - int16_t *hLumFilterPos= c->hLumFilterPos; - int16_t *hChrFilterPos= c->hChrFilterPos; + int32_t *vLumFilterPos= c->vLumFilterPos; + int32_t *vChrFilterPos= c->vChrFilterPos; + int32_t *hLumFilterPos= c->hLumFilterPos; + int32_t *hChrFilterPos= c->hChrFilterPos; int16_t *vLumFilter= c->vLumFilter; int16_t *vChrFilter= c->vChrFilter; int16_t *hLumFilter= c->hLumFilter; diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index bc36826ea2..29fe4ff2c6 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -295,10 +295,10 @@ typedef struct SwsContext { int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes. int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes. int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes. - int16_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. - int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. - int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. - int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. + int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. + int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. + int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. + int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. int hChrFilterSize; ///< Horizontal filter size for chroma pixels. int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. @@ -508,10 +508,10 @@ typedef struct SwsContext { /** @{ */ void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, - const int16_t *filterPos, int filterSize); + const int32_t *filterPos, int filterSize); void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, - const int16_t *filterPos, int filterSize); + const int32_t *filterPos, int filterSize); /** @} */ /// Color range conversion function for luma plane if needed. diff --git a/libswscale/utils.c b/libswscale/utils.c index 7c89a70953..51dd331117 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -180,7 +180,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist dist-1.0); } -static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, +static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc, int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, SwsVector *srcFilter, SwsVector *dstFilter, double param[2], int is_horizontal) { @@ -196,7 +196,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions) // NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end - FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(int16_t), fail); + FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(**filterPos), fail); if (FFABS(xInc - 0x10000) <10) { // unscaled int i; diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index 0d367f7a14..2387d0266b 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -38,7 +38,7 @@ SECTION .text ; (SwsContext *c, int{16,32}_t *dst, ; int dstW, const uint{8,16}_t *src, ; const int16_t *filter, -; const int16_t *filterPos, int filterSize); +; const int32_t *filterPos, int filterSize); ; ; Scale one horizontal line. Input is either 8-bits width or 16-bits width ; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to @@ -53,6 +53,9 @@ SECTION .text cglobal hscale%1to%2_%4_%5, %6, 7, %7 %if ARCH_X86_64 movsxd r2, r2d +%define mov32 movsxd +%else ; x86-32 +%define mov32 mov %endif ; x86-64 %if %2 == 19 %if mmsize == 8 ; mmx @@ -95,14 +98,14 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %else ; %2 == 19 lea r1, [r1+r2*(4>>r2shr)] %endif ; %2 == 15/19 - lea r5, [r5+r2*(2>>r2shr)] + lea r5, [r5+r2*(4>>r2shr)] neg r2 .loop: %if %3 == 4 ; filterSize == 4 scaling ; load 2x4 or 4x4 source pixels into m0/m1 - movsx r0, word [r5+r2*2+0] ; filterPos[0] - movsx r6, word [r5+r2*2+2] ; filterPos[1] + mov32 r0, dword [r5+r2*4+0] ; filterPos[0] + mov32 r6, dword [r5+r2*4+4] ; filterPos[1] movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}] %if mmsize == 8 movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] @@ -112,8 +115,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %else ; %1 == 8 movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] %endif - movsx r0, word [r5+r2*2+4] ; filterPos[2] - movsx r6, word [r5+r2*2+6] ; filterPos[3] + mov32 r0, dword [r5+r2*4+8] ; filterPos[2] + mov32 r6, dword [r5+r2*4+12] ; filterPos[3] movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}] %if %1 > 8 movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}] @@ -156,8 +159,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %endif ; mmx/sse2/ssse3/sse4 %else ; %3 == 8, i.e. filterSize == 8 scaling ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5 - movsx r0, word [r5+r2*1+0] ; filterPos[0] - movsx r6, word [r5+r2*1+2] ; filterPos[1] + mov32 r0, dword [r5+r2*2+0] ; filterPos[0] + mov32 r6, dword [r5+r2*2+4] ; filterPos[1] movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}] %if mmsize == 8 movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}] @@ -165,8 +168,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}] %else ; mmsize == 16 movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}] - movsx r0, word [r5+r2*1+4] ; filterPos[2] - movsx r6, word [r5+r2*1+6] ; filterPos[3] + mov32 r0, dword [r5+r2*2+8] ; filterPos[2] + mov32 r6, dword [r5+r2*2+12] ; filterPos[3] movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}] movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}] %endif ; mmsize == 8/16 @@ -251,7 +254,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 %define r1x r1 %define filter2 r6m %endif ; x86-32/64 - lea r5, [r5+r2*2] + lea r5, [r5+r2*4] %if %2 == 15 lea r1, [r1+r2*2] %else ; %2 == 19 @@ -261,8 +264,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 neg r2 .loop: - movsx r0, word [r5+r2*2+0] ; filterPos[0] - movsx r1x, word [r5+r2*2+2] ; filterPos[1] + mov32 r0, dword [r5+r2*4+0] ; filterPos[0] + mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)? pxor m4, m4 pxor m5, m5 @@ -293,7 +296,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 jl .innerloop %ifidn %4, X4 - movsx r1x, word [r5+r2*2+2] ; filterPos[1] + mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0] sub r1x, r6 ; and first 4 srcpx of dstpx[1] %if %1 > 8 diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index 64d5f0fc9d..99b32623cb 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -93,8 +93,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI int16_t **alpPixBuf= c->alpPixBuf; const int vLumBufSize= c->vLumBufSize; const int vChrBufSize= c->vChrBufSize; - int16_t *vLumFilterPos= c->vLumFilterPos; - int16_t *vChrFilterPos= c->vChrFilterPos; + int32_t *vLumFilterPos= c->vLumFilterPos; + int32_t *vChrFilterPos= c->vChrFilterPos; int16_t *vLumFilter= c->vLumFilter; int16_t *vChrFilter= c->vChrFilter; int32_t *lumMmxFilter= c->lumMmxFilter; @@ -204,7 +204,7 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( SwsContext *c, int16_t *data, \ int dstW, const uint8_t *src, \ const int16_t *filter, \ - const int16_t *filterPos, int filterSize) + const int32_t *filterPos, int filterSize) #define SCALE_FUNCS(filter_n, opt) \ SCALE_FUNC(filter_n, 8, 15, opt); \ diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 4db3fb3086..ad2b32f27b 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1376,7 +1376,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc) { - int16_t *filterPos = c->hLumFilterPos; + int32_t *filterPos = c->hLumFilterPos; int16_t *filter = c->hLumFilter; void *mmx2FilterCode= c->lumMmx2FilterCode; int i; @@ -1472,7 +1472,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc) { - int16_t *filterPos = c->hChrFilterPos; + int32_t *filterPos = c->hChrFilterPos; int16_t *filter = c->hChrFilter; void *mmx2FilterCode= c->chrMmx2FilterCode; int i; From c23acbaed40101c677dfcfbbfe0d2c230a8e8f44 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 5 Mar 2012 16:01:19 -0800 Subject: [PATCH 07/11] Don't use ff_cropTbl[] for IDCT. Results of IDCT can by far outreach the range of ff_cropTbl[], leading to overreads and potentially crashes. Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind CC: libav-stable@libav.org --- libavcodec/dsputil.c | 70 ++++++++++------------ libavcodec/h264idct_template.c | 32 +++++----- libavcodec/rv34dsp.c | 15 ++--- libavcodec/simple_idct.c | 19 +++--- libavcodec/simple_idct_template.c | 34 +++++------ libavcodec/svq3.c | 9 ++- libavcodec/vc1dsp.c | 97 +++++++++++++------------------ libavcodec/vp3dsp.c | 68 +++++++++++----------- libavcodec/vp8dsp.c | 18 +++--- 9 files changed, 161 insertions(+), 201 deletions(-) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index bd10054234..29c5976596 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -366,18 +366,17 @@ void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) { int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<8;i++) { - pixels[0] = cm[block[0]]; - pixels[1] = cm[block[1]]; - pixels[2] = cm[block[2]]; - pixels[3] = cm[block[3]]; - pixels[4] = cm[block[4]]; - pixels[5] = cm[block[5]]; - pixels[6] = cm[block[6]]; - pixels[7] = cm[block[7]]; + pixels[0] = av_clip_uint8(block[0]); + pixels[1] = av_clip_uint8(block[1]); + pixels[2] = av_clip_uint8(block[2]); + pixels[3] = av_clip_uint8(block[3]); + pixels[4] = av_clip_uint8(block[4]); + pixels[5] = av_clip_uint8(block[5]); + pixels[6] = av_clip_uint8(block[6]); + pixels[7] = av_clip_uint8(block[7]); pixels += line_size; block += 8; @@ -388,14 +387,13 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels int line_size) { int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<4;i++) { - pixels[0] = cm[block[0]]; - pixels[1] = cm[block[1]]; - pixels[2] = cm[block[2]]; - pixels[3] = cm[block[3]]; + pixels[0] = av_clip_uint8(block[0]); + pixels[1] = av_clip_uint8(block[1]); + pixels[2] = av_clip_uint8(block[2]); + pixels[3] = av_clip_uint8(block[3]); pixels += line_size; block += 8; @@ -406,12 +404,11 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels int line_size) { int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<2;i++) { - pixels[0] = cm[block[0]]; - pixels[1] = cm[block[1]]; + pixels[0] = av_clip_uint8(block[0]); + pixels[1] = av_clip_uint8(block[1]); pixels += line_size; block += 8; @@ -443,18 +440,17 @@ void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) { int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<8;i++) { - pixels[0] = cm[pixels[0] + block[0]]; - pixels[1] = cm[pixels[1] + block[1]]; - pixels[2] = cm[pixels[2] + block[2]]; - pixels[3] = cm[pixels[3] + block[3]]; - pixels[4] = cm[pixels[4] + block[4]]; - pixels[5] = cm[pixels[5] + block[5]]; - pixels[6] = cm[pixels[6] + block[6]]; - pixels[7] = cm[pixels[7] + block[7]]; + pixels[0] = av_clip_uint8(pixels[0] + block[0]); + pixels[1] = av_clip_uint8(pixels[1] + block[1]); + pixels[2] = av_clip_uint8(pixels[2] + block[2]); + pixels[3] = av_clip_uint8(pixels[3] + block[3]); + pixels[4] = av_clip_uint8(pixels[4] + block[4]); + pixels[5] = av_clip_uint8(pixels[5] + block[5]); + pixels[6] = av_clip_uint8(pixels[6] + block[6]); + pixels[7] = av_clip_uint8(pixels[7] + block[7]); pixels += line_size; block += 8; } @@ -464,14 +460,13 @@ static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels int line_size) { int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<4;i++) { - pixels[0] = cm[pixels[0] + block[0]]; - pixels[1] = cm[pixels[1] + block[1]]; - pixels[2] = cm[pixels[2] + block[2]]; - pixels[3] = cm[pixels[3] + block[3]]; + pixels[0] = av_clip_uint8(pixels[0] + block[0]); + pixels[1] = av_clip_uint8(pixels[1] + block[1]); + pixels[2] = av_clip_uint8(pixels[2] + block[2]); + pixels[3] = av_clip_uint8(pixels[3] + block[3]); pixels += line_size; block += 8; } @@ -481,12 +476,11 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels int line_size) { int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<2;i++) { - pixels[0] = cm[pixels[0] + block[0]]; - pixels[1] = cm[pixels[1] + block[1]]; + pixels[0] = av_clip_uint8(pixels[0] + block[0]); + pixels[1] = av_clip_uint8(pixels[1] + block[1]); pixels += line_size; block += 8; } @@ -2733,15 +2727,11 @@ static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) { - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - dest[0] = cm[(block[0] + 4)>>3]; + dest[0] = av_clip_uint8((block[0] + 4)>>3); } static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) { - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - - dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; + dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3)); } static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c index eba850ac6f..e476f89a6f 100644 --- a/libavcodec/h264idct_template.c +++ b/libavcodec/h264idct_template.c @@ -49,7 +49,6 @@ static const uint8_t scan8[16*3]={ void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride) { int i; - INIT_CLIP pixel *dst = (pixel*)_dst; dctcoef *block = (dctcoef*)_block; stride /= sizeof(pixel); @@ -74,16 +73,15 @@ void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride) const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i]; const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1); - dst[i + 0*stride]= CLIP(dst[i + 0*stride] + ((z0 + z3) >> 6)); - dst[i + 1*stride]= CLIP(dst[i + 1*stride] + ((z1 + z2) >> 6)); - dst[i + 2*stride]= CLIP(dst[i + 2*stride] + ((z1 - z2) >> 6)); - dst[i + 3*stride]= CLIP(dst[i + 3*stride] + ((z0 - z3) >> 6)); + dst[i + 0*stride]= av_clip_pixel(dst[i + 0*stride] + ((z0 + z3) >> 6)); + dst[i + 1*stride]= av_clip_pixel(dst[i + 1*stride] + ((z1 + z2) >> 6)); + dst[i + 2*stride]= av_clip_pixel(dst[i + 2*stride] + ((z1 - z2) >> 6)); + dst[i + 3*stride]= av_clip_pixel(dst[i + 3*stride] + ((z0 - z3) >> 6)); } } void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){ int i; - INIT_CLIP pixel *dst = (pixel*)_dst; dctcoef *block = (dctcoef*)_block; stride /= sizeof(pixel); @@ -143,14 +141,14 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){ const int b5 = (a3>>2) - a5; const int b7 = a7 - (a1>>2); - dst[i + 0*stride] = CLIP( dst[i + 0*stride] + ((b0 + b7) >> 6) ); - dst[i + 1*stride] = CLIP( dst[i + 1*stride] + ((b2 + b5) >> 6) ); - dst[i + 2*stride] = CLIP( dst[i + 2*stride] + ((b4 + b3) >> 6) ); - dst[i + 3*stride] = CLIP( dst[i + 3*stride] + ((b6 + b1) >> 6) ); - dst[i + 4*stride] = CLIP( dst[i + 4*stride] + ((b6 - b1) >> 6) ); - dst[i + 5*stride] = CLIP( dst[i + 5*stride] + ((b4 - b3) >> 6) ); - dst[i + 6*stride] = CLIP( dst[i + 6*stride] + ((b2 - b5) >> 6) ); - dst[i + 7*stride] = CLIP( dst[i + 7*stride] + ((b0 - b7) >> 6) ); + dst[i + 0*stride] = av_clip_pixel( dst[i + 0*stride] + ((b0 + b7) >> 6) ); + dst[i + 1*stride] = av_clip_pixel( dst[i + 1*stride] + ((b2 + b5) >> 6) ); + dst[i + 2*stride] = av_clip_pixel( dst[i + 2*stride] + ((b4 + b3) >> 6) ); + dst[i + 3*stride] = av_clip_pixel( dst[i + 3*stride] + ((b6 + b1) >> 6) ); + dst[i + 4*stride] = av_clip_pixel( dst[i + 4*stride] + ((b6 - b1) >> 6) ); + dst[i + 5*stride] = av_clip_pixel( dst[i + 5*stride] + ((b4 - b3) >> 6) ); + dst[i + 6*stride] = av_clip_pixel( dst[i + 6*stride] + ((b2 - b5) >> 6) ); + dst[i + 7*stride] = av_clip_pixel( dst[i + 7*stride] + ((b0 - b7) >> 6) ); } } @@ -158,13 +156,12 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){ void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){ int i, j; int dc = (((dctcoef*)block)[0] + 32) >> 6; - INIT_CLIP pixel *dst = (pixel*)_dst; stride /= sizeof(pixel); for( j = 0; j < 4; j++ ) { for( i = 0; i < 4; i++ ) - dst[i] = CLIP( dst[i] + dc ); + dst[i] = av_clip_pixel( dst[i] + dc ); dst += stride; } } @@ -172,13 +169,12 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){ void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){ int i, j; int dc = (((dctcoef*)block)[0] + 32) >> 6; - INIT_CLIP pixel *dst = (pixel*)_dst; stride /= sizeof(pixel); for( j = 0; j < 8; j++ ) { for( i = 0; i < 8; i++ ) - dst[i] = CLIP( dst[i] + dc ); + dst[i] = av_clip_pixel( dst[i] + dc ); dst += stride; } } diff --git a/libavcodec/rv34dsp.c b/libavcodec/rv34dsp.c index ce5be93894..4145c4dd85 100644 --- a/libavcodec/rv34dsp.c +++ b/libavcodec/rv34dsp.c @@ -55,7 +55,6 @@ static av_always_inline void rv34_row_transform(int temp[16], DCTELEM *block) */ static void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, DCTELEM *block){ int temp[16]; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int i; rv34_row_transform(temp, block); @@ -67,10 +66,10 @@ static void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, DCTELEM *block){ const int z2 = 7* temp[4*1+i] - 17*temp[4*3+i]; const int z3 = 17* temp[4*1+i] + 7*temp[4*3+i]; - dst[0] = cm[ dst[0] + ( (z0 + z3) >> 10 ) ]; - dst[1] = cm[ dst[1] + ( (z1 + z2) >> 10 ) ]; - dst[2] = cm[ dst[2] + ( (z1 - z2) >> 10 ) ]; - dst[3] = cm[ dst[3] + ( (z0 - z3) >> 10 ) ]; + dst[0] = av_clip_uint8( dst[0] + ( (z0 + z3) >> 10 ) ); + dst[1] = av_clip_uint8( dst[1] + ( (z1 + z2) >> 10 ) ); + dst[2] = av_clip_uint8( dst[2] + ( (z1 - z2) >> 10 ) ); + dst[3] = av_clip_uint8( dst[3] + ( (z0 - z3) >> 10 ) ); dst += stride; } @@ -103,15 +102,13 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){ static void rv34_idct_dc_add_c(uint8_t *dst, ptrdiff_t stride, int dc) { - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int i, j; - cm += (13*13*dc + 0x200) >> 10; - + dc = (13*13*dc + 0x200) >> 10; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) - dst[j] = cm[ dst[j] ]; + dst[j] = av_clip_uint8( dst[j] + dc ); dst += stride; } diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c index 0c75261079..5812a87705 100644 --- a/libavcodec/simple_idct.c +++ b/libavcodec/simple_idct.c @@ -53,7 +53,6 @@ static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col) { int c0, c1, c2, c3, a0, a1, a2, a3; - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; a0 = col[8*0]; a1 = col[8*2]; @@ -63,13 +62,13 @@ static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); c1 = a1 * C1 + a3 * C2; c3 = a1 * C2 - a3 * C1; - dest[0] = cm[(c0 + c1) >> C_SHIFT]; + dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT); dest += line_size; - dest[0] = cm[(c2 + c3) >> C_SHIFT]; + dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT); dest += line_size; - dest[0] = cm[(c2 - c3) >> C_SHIFT]; + dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT); dest += line_size; - dest[0] = cm[(c0 - c1) >> C_SHIFT]; + dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT); } #define BF(k) \ @@ -133,7 +132,6 @@ void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block) static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col) { int c0, c1, c2, c3, a0, a1, a2, a3; - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; a0 = col[8*0]; a1 = col[8*1]; @@ -143,13 +141,13 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); c1 = a1 * C1 + a3 * C2; c3 = a1 * C2 - a3 * C1; - dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; + dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT)); dest += line_size; - dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; + dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT)); dest += line_size; - dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; + dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT)); dest += line_size; - dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)]; + dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT)); } #define RN_SHIFT 15 @@ -161,7 +159,6 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col static inline void idct4row(DCTELEM *row) { int c0, c1, c2, c3, a0, a1, a2, a3; - //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; a0 = row[0]; a1 = row[1]; diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c index fdec3aab2b..3c855e3825 100644 --- a/libavcodec/simple_idct_template.c +++ b/libavcodec/simple_idct_template.c @@ -224,50 +224,48 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, DCTELEM *col) { int a0, a1, a2, a3, b0, b1, b2, b3; - INIT_CLIP; IDCT_COLS; - dest[0] = CLIP((a0 + b0) >> COL_SHIFT); + dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT); dest += line_size; - dest[0] = CLIP((a1 + b1) >> COL_SHIFT); + dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT); dest += line_size; - dest[0] = CLIP((a2 + b2) >> COL_SHIFT); + dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT); dest += line_size; - dest[0] = CLIP((a3 + b3) >> COL_SHIFT); + dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT); dest += line_size; - dest[0] = CLIP((a3 - b3) >> COL_SHIFT); + dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT); dest += line_size; - dest[0] = CLIP((a2 - b2) >> COL_SHIFT); + dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT); dest += line_size; - dest[0] = CLIP((a1 - b1) >> COL_SHIFT); + dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT); dest += line_size; - dest[0] = CLIP((a0 - b0) >> COL_SHIFT); + dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT); } static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, DCTELEM *col) { int a0, a1, a2, a3, b0, b1, b2, b3; - INIT_CLIP; IDCT_COLS; - dest[0] = CLIP(dest[0] + ((a0 + b0) >> COL_SHIFT)); + dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT)); dest += line_size; - dest[0] = CLIP(dest[0] + ((a1 + b1) >> COL_SHIFT)); + dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT)); dest += line_size; - dest[0] = CLIP(dest[0] + ((a2 + b2) >> COL_SHIFT)); + dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT)); dest += line_size; - dest[0] = CLIP(dest[0] + ((a3 + b3) >> COL_SHIFT)); + dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT)); dest += line_size; - dest[0] = CLIP(dest[0] + ((a3 - b3) >> COL_SHIFT)); + dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT)); dest += line_size; - dest[0] = CLIP(dest[0] + ((a2 - b2) >> COL_SHIFT)); + dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT)); dest += line_size; - dest[0] = CLIP(dest[0] + ((a1 - b1) >> COL_SHIFT)); + dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT)); dest += line_size; - dest[0] = CLIP(dest[0] + ((a0 - b0) >> COL_SHIFT)); + dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT)); } static inline void FUNC(idctSparseCol)(DCTELEM *col) diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c index e157061f64..caafc49969 100644 --- a/libavcodec/svq3.c +++ b/libavcodec/svq3.c @@ -173,7 +173,6 @@ void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, { const int qmul = svq3_dequant_coeff[qp]; int i; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; if (dc) { dc = 13*13*((dc == 1) ? 1538*block[0] : ((qmul*(block[0] >> 3)) / 2)); @@ -199,10 +198,10 @@ void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, const int z3 = 17* block[i + 4*1] + 7*block[i + 4*3]; const int rr = (dc + 0x80000); - dst[i + stride*0] = cm[ dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) ]; - dst[i + stride*1] = cm[ dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) ]; - dst[i + stride*2] = cm[ dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) ]; - dst[i + stride*3] = cm[ dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) ]; + dst[i + stride*0] = av_clip_uint8( dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) ); + dst[i + stride*1] = av_clip_uint8( dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) ); + dst[i + stride*2] = av_clip_uint8( dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) ); + dst[i + stride*3] = av_clip_uint8( dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) ); } } diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c index 9bd107cdd9..b40824b86a 100644 --- a/libavcodec/vc1dsp.c +++ b/libavcodec/vc1dsp.c @@ -139,8 +139,6 @@ static void vc1_h_s_overlap_c(DCTELEM *left, DCTELEM *right) * @see 8.6 */ static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; - int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3; int a0_sign = a0 >> 31; /* Store sign */ a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */ @@ -163,8 +161,8 @@ static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){ else{ d = FFMIN(d, clip); d = (d ^ d_sign) - d_sign; /* Restore sign */ - src[-1*stride] = cm[src[-1*stride] - d]; - src[ 0*stride] = cm[src[ 0*stride] + d]; + src[-1*stride] = av_clip_uint8(src[-1*stride] - d); + src[ 0*stride] = av_clip_uint8(src[ 0*stride] + d); } return 1; } @@ -234,19 +232,17 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) { int i; int dc = block[0]; - const uint8_t *cm; dc = (3 * dc + 1) >> 1; dc = (3 * dc + 16) >> 5; - cm = ff_cropTbl + MAX_NEG_CROP + dc; for(i = 0; i < 8; i++){ - dest[0] = cm[dest[0]]; - dest[1] = cm[dest[1]]; - dest[2] = cm[dest[2]]; - dest[3] = cm[dest[3]]; - dest[4] = cm[dest[4]]; - dest[5] = cm[dest[5]]; - dest[6] = cm[dest[6]]; - dest[7] = cm[dest[7]]; + dest[0] = av_clip_uint8(dest[0] + dc); + dest[1] = av_clip_uint8(dest[1] + dc); + dest[2] = av_clip_uint8(dest[2] + dc); + dest[3] = av_clip_uint8(dest[3] + dc); + dest[4] = av_clip_uint8(dest[4] + dc); + dest[5] = av_clip_uint8(dest[5] + dc); + dest[6] = av_clip_uint8(dest[6] + dc); + dest[7] = av_clip_uint8(dest[7] + dc); dest += linesize; } } @@ -326,19 +322,17 @@ static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) { int i; int dc = block[0]; - const uint8_t *cm; dc = ( 3 * dc + 1) >> 1; dc = (17 * dc + 64) >> 7; - cm = ff_cropTbl + MAX_NEG_CROP + dc; for(i = 0; i < 4; i++){ - dest[0] = cm[dest[0]]; - dest[1] = cm[dest[1]]; - dest[2] = cm[dest[2]]; - dest[3] = cm[dest[3]]; - dest[4] = cm[dest[4]]; - dest[5] = cm[dest[5]]; - dest[6] = cm[dest[6]]; - dest[7] = cm[dest[7]]; + dest[0] = av_clip_uint8(dest[0] + dc); + dest[1] = av_clip_uint8(dest[1] + dc); + dest[2] = av_clip_uint8(dest[2] + dc); + dest[3] = av_clip_uint8(dest[3] + dc); + dest[4] = av_clip_uint8(dest[4] + dc); + dest[5] = av_clip_uint8(dest[5] + dc); + dest[6] = av_clip_uint8(dest[6] + dc); + dest[7] = av_clip_uint8(dest[7] + dc); dest += linesize; } } @@ -348,7 +342,6 @@ static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block) int i; register int t1,t2,t3,t4,t5,t6,t7,t8; DCTELEM *src, *dst; - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; src = block; dst = block; @@ -388,10 +381,10 @@ static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block) t3 = 22 * src[ 8] + 10 * src[24]; t4 = 22 * src[24] - 10 * src[ 8]; - dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)]; - dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)]; - dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)]; - dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)]; + dest[0*linesize] = av_clip_uint8(dest[0*linesize] + ((t1 + t3) >> 7)); + dest[1*linesize] = av_clip_uint8(dest[1*linesize] + ((t2 - t4) >> 7)); + dest[2*linesize] = av_clip_uint8(dest[2*linesize] + ((t2 + t4) >> 7)); + dest[3*linesize] = av_clip_uint8(dest[3*linesize] + ((t1 - t3) >> 7)); src ++; dest++; @@ -404,15 +397,13 @@ static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) { int i; int dc = block[0]; - const uint8_t *cm; dc = (17 * dc + 4) >> 3; dc = (12 * dc + 64) >> 7; - cm = ff_cropTbl + MAX_NEG_CROP + dc; for(i = 0; i < 8; i++){ - dest[0] = cm[dest[0]]; - dest[1] = cm[dest[1]]; - dest[2] = cm[dest[2]]; - dest[3] = cm[dest[3]]; + dest[0] = av_clip_uint8(dest[0] + dc); + dest[1] = av_clip_uint8(dest[1] + dc); + dest[2] = av_clip_uint8(dest[2] + dc); + dest[3] = av_clip_uint8(dest[3] + dc); dest += linesize; } } @@ -422,7 +413,6 @@ static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block) int i; register int t1,t2,t3,t4,t5,t6,t7,t8; DCTELEM *src, *dst; - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; src = block; dst = block; @@ -458,14 +448,14 @@ static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block) t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; - dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1) >> 7)]; - dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2) >> 7)]; - dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3) >> 7)]; - dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4) >> 7)]; - dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 1) >> 7)]; - dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 1) >> 7)]; - dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 1) >> 7)]; - dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 1) >> 7)]; + dest[0*linesize] = av_clip_uint8(dest[0*linesize] + ((t5 + t1) >> 7)); + dest[1*linesize] = av_clip_uint8(dest[1*linesize] + ((t6 + t2) >> 7)); + dest[2*linesize] = av_clip_uint8(dest[2*linesize] + ((t7 + t3) >> 7)); + dest[3*linesize] = av_clip_uint8(dest[3*linesize] + ((t8 + t4) >> 7)); + dest[4*linesize] = av_clip_uint8(dest[4*linesize] + ((t8 - t4 + 1) >> 7)); + dest[5*linesize] = av_clip_uint8(dest[5*linesize] + ((t7 - t3 + 1) >> 7)); + dest[6*linesize] = av_clip_uint8(dest[6*linesize] + ((t6 - t2 + 1) >> 7)); + dest[7*linesize] = av_clip_uint8(dest[7*linesize] + ((t5 - t1 + 1) >> 7)); src ++; dest++; @@ -478,15 +468,13 @@ static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) { int i; int dc = block[0]; - const uint8_t *cm; dc = (17 * dc + 4) >> 3; dc = (17 * dc + 64) >> 7; - cm = ff_cropTbl + MAX_NEG_CROP + dc; for(i = 0; i < 4; i++){ - dest[0] = cm[dest[0]]; - dest[1] = cm[dest[1]]; - dest[2] = cm[dest[2]]; - dest[3] = cm[dest[3]]; + dest[0] = av_clip_uint8(dest[0] + dc); + dest[1] = av_clip_uint8(dest[1] + dc); + dest[2] = av_clip_uint8(dest[2] + dc); + dest[3] = av_clip_uint8(dest[3] + dc); dest += linesize; } } @@ -496,7 +484,6 @@ static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block) int i; register int t1,t2,t3,t4; DCTELEM *src, *dst; - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; src = block; dst = block; @@ -522,10 +509,10 @@ static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block) t3 = 22 * src[ 8] + 10 * src[24]; t4 = 22 * src[24] - 10 * src[ 8]; - dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3) >> 7)]; - dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4) >> 7)]; - dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4) >> 7)]; - dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3) >> 7)]; + dest[0*linesize] = av_clip_uint8(dest[0*linesize] + ((t1 + t3) >> 7)); + dest[1*linesize] = av_clip_uint8(dest[1*linesize] + ((t2 - t4) >> 7)); + dest[2*linesize] = av_clip_uint8(dest[2*linesize] + ((t2 + t4) >> 7)); + dest[3*linesize] = av_clip_uint8(dest[3*linesize] + ((t1 - t3) >> 7)); src ++; dest++; diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c index baa22a5519..438ae76b57 100644 --- a/libavcodec/vp3dsp.c +++ b/libavcodec/vp3dsp.c @@ -41,7 +41,6 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type) { int16_t *ip = input; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; int Ed, Gd, Add, Bdd, Fd, Hd; @@ -147,29 +146,29 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int ip[5*8] = (Fd + Bdd ) >> 4; ip[6*8] = (Fd - Bdd ) >> 4; }else if(type==1){ - dst[0*stride] = cm[(Gd + Cd ) >> 4]; - dst[7*stride] = cm[(Gd - Cd ) >> 4]; + dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4); + dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4); - dst[1*stride] = cm[(Add + Hd ) >> 4]; - dst[2*stride] = cm[(Add - Hd ) >> 4]; + dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4); + dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4); - dst[3*stride] = cm[(Ed + Dd ) >> 4]; - dst[4*stride] = cm[(Ed - Dd ) >> 4]; + dst[3*stride] = av_clip_uint8((Ed + Dd ) >> 4); + dst[4*stride] = av_clip_uint8((Ed - Dd ) >> 4); - dst[5*stride] = cm[(Fd + Bdd ) >> 4]; - dst[6*stride] = cm[(Fd - Bdd ) >> 4]; + dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4); + dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4); }else{ - dst[0*stride] = cm[dst[0*stride] + ((Gd + Cd ) >> 4)]; - dst[7*stride] = cm[dst[7*stride] + ((Gd - Cd ) >> 4)]; + dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd ) >> 4)); + dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd ) >> 4)); - dst[1*stride] = cm[dst[1*stride] + ((Add + Hd ) >> 4)]; - dst[2*stride] = cm[dst[2*stride] + ((Add - Hd ) >> 4)]; + dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4)); + dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4)); - dst[3*stride] = cm[dst[3*stride] + ((Ed + Dd ) >> 4)]; - dst[4*stride] = cm[dst[4*stride] + ((Ed - Dd ) >> 4)]; + dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd ) >> 4)); + dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd ) >> 4)); - dst[5*stride] = cm[dst[5*stride] + ((Fd + Bdd ) >> 4)]; - dst[6*stride] = cm[dst[6*stride] + ((Fd - Bdd ) >> 4)]; + dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4)); + dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4)); } } else { @@ -190,18 +189,18 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int dst[4*stride]= dst[5*stride]= dst[6*stride]= - dst[7*stride]= cm[128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20)]; + dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20)); }else{ if(ip[0*8]){ int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); - dst[0*stride] = cm[dst[0*stride] + v]; - dst[1*stride] = cm[dst[1*stride] + v]; - dst[2*stride] = cm[dst[2*stride] + v]; - dst[3*stride] = cm[dst[3*stride] + v]; - dst[4*stride] = cm[dst[4*stride] + v]; - dst[5*stride] = cm[dst[5*stride] + v]; - dst[6*stride] = cm[dst[6*stride] + v]; - dst[7*stride] = cm[dst[7*stride] + v]; + dst[0*stride] = av_clip_uint8(dst[0*stride] + v); + dst[1*stride] = av_clip_uint8(dst[1*stride] + v); + dst[2*stride] = av_clip_uint8(dst[2*stride] + v); + dst[3*stride] = av_clip_uint8(dst[3*stride] + v); + dst[4*stride] = av_clip_uint8(dst[4*stride] + v); + dst[5*stride] = av_clip_uint8(dst[5*stride] + v); + dst[6*stride] = av_clip_uint8(dst[6*stride] + v); + dst[7*stride] = av_clip_uint8(dst[7*stride] + v); } } } @@ -225,17 +224,16 @@ void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/* void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){ int i, dc = (block[0] + 15) >> 5; - const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; for(i = 0; i < 8; i++){ - dest[0] = cm[dest[0]]; - dest[1] = cm[dest[1]]; - dest[2] = cm[dest[2]]; - dest[3] = cm[dest[3]]; - dest[4] = cm[dest[4]]; - dest[5] = cm[dest[5]]; - dest[6] = cm[dest[6]]; - dest[7] = cm[dest[7]]; + dest[0] = av_clip_uint8(dest[0] + dc); + dest[1] = av_clip_uint8(dest[1] + dc); + dest[2] = av_clip_uint8(dest[2] + dc); + dest[3] = av_clip_uint8(dest[3] + dc); + dest[4] = av_clip_uint8(dest[4] + dc); + dest[5] = av_clip_uint8(dest[5] + dc); + dest[6] = av_clip_uint8(dest[6] + dc); + dest[7] = av_clip_uint8(dest[7] + dc); dest += line_size; } } diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c index 86dc42ed37..f6c944328a 100644 --- a/libavcodec/vp8dsp.c +++ b/libavcodec/vp8dsp.c @@ -80,7 +80,6 @@ static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16]) static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) { int i, t0, t1, t2, t3; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; DCTELEM tmp[16]; for (i = 0; i < 4; i++) { @@ -105,10 +104,10 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); - dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)]; - dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)]; - dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)]; - dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)]; + dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); + dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); + dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); + dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); dst += stride; } } @@ -116,14 +115,13 @@ static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], ptrdiff_t stride) { int i, dc = (block[0] + 4) >> 3; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; block[0] = 0; for (i = 0; i < 4; i++) { - dst[0] = cm[dst[0]]; - dst[1] = cm[dst[1]]; - dst[2] = cm[dst[2]]; - dst[3] = cm[dst[3]]; + dst[0] = av_clip_uint8(dst[0] + dc); + dst[1] = av_clip_uint8(dst[1] + dc); + dst[2] = av_clip_uint8(dst[2] + dc); + dst[3] = av_clip_uint8(dst[3] + dc); dst += stride; } } From 11b940a1a8e7e5d5b212935a3ce78aeda577f5f2 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 5 Mar 2012 17:03:32 -0800 Subject: [PATCH 08/11] svq3: protect against negative quantizers. Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind CC: libav-stable@libav.org --- libavcodec/svq3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c index caafc49969..99c1dec07c 100644 --- a/libavcodec/svq3.c +++ b/libavcodec/svq3.c @@ -650,7 +650,7 @@ static int svq3_decode_mb(SVQ3Context *svq3, unsigned int mb_type) if (IS_INTRA16x16(mb_type) || (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) { s->qscale += svq3_get_se_golomb(&s->gb); - if (s->qscale > 31){ + if (s->qscale > 31u){ av_log(h->s.avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale); return -1; } From 6193ff68549ecbaf1a4d63a0e06964ec580ac620 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 6 Mar 2012 10:27:05 -0800 Subject: [PATCH 09/11] error_resilience: initialize s->block_index[]. Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind CC: libav-stable@libav.org --- libavcodec/error_resilience.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c index 0fab8be1fd..d14adb4437 100644 --- a/libavcodec/error_resilience.c +++ b/libavcodec/error_resilience.c @@ -419,9 +419,14 @@ static void guess_mv(MpegEncContext *s) if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) || num_avail <= mb_width / 2) { for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + s->mb_x = 0; + s->mb_y = mb_y; + ff_init_block_index(s); for (mb_x = 0; mb_x < s->mb_width; mb_x++) { const int mb_xy = mb_x + mb_y * s->mb_stride; + ff_update_block_index(s); + if (IS_INTRA(s->current_picture.f.mb_type[mb_xy])) continue; if (!(s->error_status_table[mb_xy] & ER_MV_ERROR)) @@ -456,6 +461,9 @@ static void guess_mv(MpegEncContext *s) changed = 0; for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + s->mb_x = 0; + s->mb_y = mb_y; + ff_init_block_index(s); for (mb_x = 0; mb_x < s->mb_width; mb_x++) { const int mb_xy = mb_x + mb_y * s->mb_stride; int mv_predictor[8][2] = { { 0 } }; @@ -467,6 +475,8 @@ static void guess_mv(MpegEncContext *s) const int mot_index = (mb_x + mb_y * mot_stride) * mot_step; int prev_x, prev_y, prev_ref; + ff_update_block_index(s); + if ((mb_x ^ mb_y ^ pass) & 1) continue; @@ -1072,11 +1082,16 @@ void ff_er_frame_end(MpegEncContext *s) /* handle inter blocks with damaged AC */ for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + s->mb_x = 0; + s->mb_y = mb_y; + ff_init_block_index(s); for (mb_x = 0; mb_x < s->mb_width; mb_x++) { const int mb_xy = mb_x + mb_y * s->mb_stride; const int mb_type = s->current_picture.f.mb_type[mb_xy]; int dir = !s->last_picture.f.data[0]; + ff_update_block_index(s); + error = s->error_status_table[mb_xy]; if (IS_INTRA(mb_type)) @@ -1114,11 +1129,16 @@ void ff_er_frame_end(MpegEncContext *s) /* guess MVs */ if (s->pict_type == AV_PICTURE_TYPE_B) { for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + s->mb_x = 0; + s->mb_y = mb_y; + ff_init_block_index(s); for (mb_x = 0; mb_x < s->mb_width; mb_x++) { int xy = mb_x * 2 + mb_y * 2 * s->b8_stride; const int mb_xy = mb_x + mb_y * s->mb_stride; const int mb_type = s->current_picture.f.mb_type[mb_xy]; + ff_update_block_index(s); + error = s->error_status_table[mb_xy]; if (IS_INTRA(mb_type)) From a9c5b6f6020c3fad6bf59985518e08453e002cdf Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 6 Mar 2012 12:47:23 -0800 Subject: [PATCH 10/11] cpu: initialize mask to -1, so that by default, optimizations are used. --- libavutil/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index e72f7231a8..c44075be29 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -19,7 +19,7 @@ #include "cpu.h" #include "config.h" -static int cpuflags_mask, checked; +static int cpuflags_mask = -1, checked; int av_get_cpu_flags(void) { From b5161908e06b4497bf663510fb495ba97a6fd2b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= Date: Tue, 6 Mar 2012 22:11:30 +0100 Subject: [PATCH 11/11] SBR DSP: fix SSE code to not use SSE2 instructions. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit movq from SSE register _to_ memory is an SSE2 instruction. Use the SSE movlps function instead that does the same thing. Signed-off-by: Reimar Döffinger Signed-off-by: Ronald S. Bultje --- libavcodec/x86/sbrdsp.asm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm index c165c52ca4..c3b559bb15 100644 --- a/libavcodec/x86/sbrdsp.asm +++ b/libavcodec/x86/sbrdsp.asm @@ -104,7 +104,7 @@ cglobal sbr_hf_g_filt, 5, 6, 5 movq m2, [r1] punpckldq m0, m0 mulps m2, m0 - movq [r0], m2 + movlps [r0], m2 add r0, 8 add r2, 4 add r1, STEP