From 2fd14dd8eb66dc5dd14254d0b758fb80d44b3140 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Mon, 12 Oct 2015 19:37:45 +0200 Subject: [PATCH] avcodec/simple_idct10: improve precision omse goes from 0.03060703 (which fails for dct-test) to 0.01663750. This also actually improve the error of decoding the sample generated by fate-vsynth3-dnxhd1080i-10bit using simple_idct10 to FAANI, which goes (when resampled to yuv422p) from: stddev: 0.06 PSNR: 72.28 MAXDIFF: 1 to identical. Signed-off-by: Michael Niedermayer --- libavcodec/simple_idct.c | 9 +++- libavcodec/simple_idct_template.c | 43 +++++++++++++------ tests/ref/fate/dnxhr-444 | 2 +- tests/ref/vsynth/vsynth1-dnxhd-720p-10bit | 2 +- tests/ref/vsynth/vsynth2-dnxhd-720p-10bit | 2 +- tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit | 2 +- 6 files changed, 41 insertions(+), 19 deletions(-) diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c index eeb627999c..4d6d20df5a 100644 --- a/libavcodec/simple_idct.c +++ b/libavcodec/simple_idct.c @@ -36,6 +36,11 @@ #define BIT_DEPTH 10 #include "simple_idct_template.c" + +#define EXTRA_SHIFT 2 +#include "simple_idct_template.c" + +#undef EXTRA_SHIFT #undef BIT_DEPTH #define BIT_DEPTH 12 @@ -230,10 +235,10 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat) block[i] *= qmat[i]; for (i = 0; i < 8; i++) - idctRowCondDC_10(block + i*8, 2); + idctRowCondDC_extrashift_10(block + i*8, 2); for (i = 0; i < 8; i++) { block[i] += 8192; - idctSparseCol_10(block + i); + idctSparseCol_extrashift_10(block + i); } } diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c index 789db8d0ac..0585679b6d 100644 --- a/libavcodec/simple_idct_template.c +++ b/libavcodec/simple_idct_template.c @@ -66,19 +66,26 @@ #elif BIT_DEPTH == 10 || BIT_DEPTH == 12 -#if BIT_DEPTH == 10 -#define W1 (22725*4) // 90901 -#define W2 (21407*4) // 85627 -#define W3 (19265*4) // 77062 -#define W4 (16384*4) // 65535 -#define W5 (12873*4) // 51491 -#define W6 ( 8867*4) // 35468 -#define W7 ( 4520*4) // 18081 +# if BIT_DEPTH == 10 +#define W1 22725 // 90901 +#define W2 21407 // 85627 +#define W3 19265 // 77062 +#define W4 16384 // 65535 +#define W5 12873 // 51491 +#define W6 8867 // 35468 +#define W7 4520 // 18081 -#define ROW_SHIFT 15 -#define COL_SHIFT 20 -#define DC_SHIFT 1 -#else +# ifdef EXTRA_SHIFT +#define ROW_SHIFT 13 +#define COL_SHIFT 18 +#define DC_SHIFT 1 +# else +#define ROW_SHIFT 12 +#define COL_SHIFT 19 +#define DC_SHIFT 2 +# endif + +# else #define W1 45451 #define W2 42813 #define W3 38531 @@ -90,7 +97,7 @@ #define ROW_SHIFT 16 #define COL_SHIFT 17 #define DC_SHIFT -1 -#endif +# endif #define MUL(a, b) ((a) * (b)) #define MAC(a, b, c) ((a) += (b) * (c)) @@ -101,7 +108,11 @@ #endif +#ifdef EXTRA_SHIFT +static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift) +#else static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) +#endif { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -236,6 +247,9 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) } \ } while (0) +#ifdef EXTRA_SHIFT +static inline void FUNC(idctSparseCol_extrashift)(int16_t *col) +#else static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, int16_t *col) { @@ -285,6 +299,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, } static inline void FUNC(idctSparseCol)(int16_t *col) +#endif { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -300,6 +315,7 @@ static inline void FUNC(idctSparseCol)(int16_t *col) col[56] = ((a0 - b0) >> COL_SHIFT); } +#ifndef EXTRA_SHIFT void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block) { pixel *dest = (pixel *)dest_; @@ -338,3 +354,4 @@ void FUNC(ff_simple_idct)(int16_t *block) for (i = 0; i < 8; i++) FUNC(idctSparseCol)(block + i); } +#endif diff --git a/tests/ref/fate/dnxhr-444 b/tests/ref/fate/dnxhr-444 index 743067d95a..f9e73c313e 100644 --- a/tests/ref/fate/dnxhr-444 +++ b/tests/ref/fate/dnxhr-444 @@ -1,2 +1,2 @@ #tb 0: 1/24 -0, 0, 0, 1, 9665280, 0x238a023e +0, 0, 0, 1, 9665280, 0x19ef4057 diff --git a/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit index ab588070e0..dc808f3836 100644 --- a/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit +++ b/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit @@ -1,4 +1,4 @@ f8c4b7aa165a80df2485d526161290a3 *tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd 2293760 tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd -3cc84f9e8d2e704475b410de27dd9951 *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo +87f1f0e074466facd3a9922ecc8311db *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo stddev: 6.23 PSNR: 32.23 MAXDIFF: 64 bytes: 7603200/ 760320 diff --git a/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit index 5c21985bdd..0d2068d40d 100644 --- a/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit +++ b/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit @@ -1,4 +1,4 @@ e49cb87f69acc809aee55d64990c84a9 *tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd 2293760 tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd -a98c4b69d4d036089a455e147d6922a7 *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo +1e6e1ef90e5c9b16a80acc17fde596ff *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo stddev: 1.54 PSNR: 44.36 MAXDIFF: 31 bytes: 7603200/ 760320 diff --git a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit index 1dcadd80cf..b9c9e03579 100644 --- a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit +++ b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit @@ -1,4 +1,4 @@ e96fc4a7d994b9369c50da32fd325822 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd 2293760 tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd -2b497215c57558910a605ff8c78430d9 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo +0e9fcec94aeff70bac5dec02cf2391bc *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo stddev: 1.33 PSNR: 45.61 MAXDIFF: 22 bytes: 7603200/ 760320