From b1fdf81c6eed786742c08037a9aa662ef7967ab2 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 22 Jun 2015 02:22:36 +0200 Subject: [PATCH] avcodec/jpeg2000dwt: use 32x32->64 multiplies in the 9/7i DWT This significantly improves the quality when the integer 9/7 transform is used Signed-off-by: Michael Niedermayer --- libavcodec/jpeg2000dwt.c | 33 ++++++++++++++----- tests/ref/fate/j2k-dwt | 40 ++++++++++++------------ tests/ref/fate/jpeg2000-dcinema | 4 +-- tests/ref/vsynth/vsynth1-jpeg2000-97 | 8 ++--- tests/ref/vsynth/vsynth2-jpeg2000-97 | 8 ++--- tests/ref/vsynth/vsynth3-jpeg2000-97 | 8 ++--- tests/ref/vsynth/vsynth_lena-jpeg2000-97 | 8 ++--- 7 files changed, 63 insertions(+), 46 deletions(-) diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c index a1fe713202..dff2516d11 100644 --- a/libavcodec/jpeg2000dwt.c +++ b/libavcodec/jpeg2000dwt.c @@ -40,12 +40,13 @@ /* Lifting parameters in integer format. * Computed as param = (float param) * (1 << 16) */ -#define I_LFTG_ALPHA 103949 -#define I_LFTG_BETA 3472 -#define I_LFTG_GAMMA 57862 -#define I_LFTG_DELTA 29066 -#define I_LFTG_K 80621 -#define I_LFTG_X 53274 +#define I_LFTG_ALPHA 103949ll +#define I_LFTG_BETA 3472ll +#define I_LFTG_GAMMA 57862ll +#define I_LFTG_DELTA 29066ll +#define I_LFTG_K 80621ll +#define I_LFTG_X 53274ll +#define I_PRESHIFT 8 static inline void extend53(int *p, int i0, int i1) { @@ -246,11 +247,16 @@ static void sd_1d97_int(int *p, int i0, int i1) static void dwt_encode97_int(DWTContext *s, int *t) { - int lev, - w = s->linelen[s->ndeclevels-1][0]; + int lev; + int w = s->linelen[s->ndeclevels-1][0]; + int h = s->linelen[s->ndeclevels-1][1]; + int i; int *line = s->i_linebuf; line += 5; + for (i = 0; i < w * h; i++) + t[i] <<= I_PRESHIFT; + for (lev = s->ndeclevels-1; lev >= 0; lev--){ int lh = s->linelen[lev][0], lv = s->linelen[lev][1], @@ -294,6 +300,9 @@ static void dwt_encode97_int(DWTContext *s, int *t) } } + + for (i = 0; i < w * h; i++) + t[i] = (t[i] + ((1<>1)) >> I_PRESHIFT; } static void sr_1d53(int *p, int i0, int i1) @@ -471,11 +480,16 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t) { int lev; int w = s->linelen[s->ndeclevels - 1][0]; + int h = s->linelen[s->ndeclevels - 1][1]; + int i; int32_t *line = s->i_linebuf; int32_t *data = t; /* position at index O of line range [0-5,w+5] cf. extend function */ line += 5; + for (i = 0; i < w * h; i++) + data[i] <<= I_PRESHIFT; + for (lev = 0; lev < s->ndeclevels; lev++) { int lh = s->linelen[lev][0], lv = s->linelen[lev][1], @@ -515,6 +529,9 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t) data[w * i + lp] = l[i]; } } + + for (i = 0; i < w * h; i++) + data[i] = (data[i] + ((1<>1)) >> I_PRESHIFT; } int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2], diff --git a/tests/ref/fate/j2k-dwt b/tests/ref/fate/j2k-dwt index 5ede1ce2f5..c130fafa6d 100644 --- a/tests/ref/fate/j2k-dwt +++ b/tests/ref/fate/j2k-dwt @@ -1,60 +1,60 @@ 5/3i, decomp:15 border 151 170 140 183 milli-err2: 0 -9/7i, decomp:15 border 151 170 140 183 milli-err2: 5188 +9/7i, decomp:15 border 151 170 140 183 milli-err2: 544 9/7f, decomp:15 border 151 170 140 183 err2: 0.0001 5/3i, decomp:21 border 173 201 81 189 milli-err2: 0 -9/7i, decomp:21 border 173 201 81 189 milli-err2: 6478 +9/7i, decomp:21 border 173 201 81 189 milli-err2: 592 9/7f, decomp:21 border 173 201 81 189 err2: 0.0001 5/3i, decomp:22 border 213 227 76 245 milli-err2: 0 -9/7i, decomp:22 border 213 227 76 245 milli-err2: 6539 +9/7i, decomp:22 border 213 227 76 245 milli-err2: 533 9/7f, decomp:22 border 213 227 76 245 err2: 0.0001 5/3i, decomp:13 border 134 157 184 203 milli-err2: 0 -9/7i, decomp:13 border 134 157 184 203 milli-err2: 19203 +9/7i, decomp:13 border 134 157 184 203 milli-err2: 535 9/7f, decomp:13 border 134 157 184 203 err2: 0.0001 5/3i, decomp: 1 border 204 237 6 106 milli-err2: 0 -9/7i, decomp: 1 border 204 237 6 106 milli-err2: 924 +9/7i, decomp: 1 border 204 237 6 106 milli-err2: 219 9/7f, decomp: 1 border 204 237 6 106 err2: 0.0000 5/3i, decomp:28 border 76 211 13 210 milli-err2: 0 -9/7i, decomp:28 border 76 211 13 210 milli-err2: 17297 +9/7i, decomp:28 border 76 211 13 210 milli-err2: 791 9/7f, decomp:28 border 76 211 13 210 err2: 0.0002 5/3i, decomp:21 border 76 99 43 123 milli-err2: 0 -9/7i, decomp:21 border 76 99 43 123 milli-err2: 9039 +9/7i, decomp:21 border 76 99 43 123 milli-err2: 686 9/7f, decomp:21 border 76 99 43 123 err2: 0.0001 5/3i, decomp:15 border 192 243 174 204 milli-err2: 0 -9/7i, decomp:15 border 192 243 174 204 milli-err2: 7693 +9/7i, decomp:15 border 192 243 174 204 milli-err2: 476 9/7f, decomp:15 border 192 243 174 204 err2: 0.0001 5/3i, decomp:21 border 17 68 93 204 milli-err2: 0 -9/7i, decomp:21 border 17 68 93 204 milli-err2: 7810 +9/7i, decomp:21 border 17 68 93 204 milli-err2: 633 9/7f, decomp:21 border 17 68 93 204 err2: 0.0001 5/3i, decomp:11 border 142 168 82 174 milli-err2: 0 -9/7i, decomp:11 border 142 168 82 174 milli-err2: 18168 +9/7i, decomp:11 border 142 168 82 174 milli-err2: 696 9/7f, decomp:11 border 142 168 82 174 err2: 0.0001 5/3i, decomp:23 border 142 209 171 235 milli-err2: 0 -9/7i, decomp:23 border 142 209 171 235 milli-err2: 7313 +9/7i, decomp:23 border 142 209 171 235 milli-err2: 626 9/7f, decomp:23 border 142 209 171 235 err2: 0.0001 5/3i, decomp:30 border 37 185 79 245 milli-err2: 0 -9/7i, decomp:30 border 37 185 79 245 milli-err2: 13498 +9/7i, decomp:30 border 37 185 79 245 milli-err2: 953 9/7f, decomp:30 border 37 185 79 245 err2: 0.0002 5/3i, decomp: 5 border 129 236 30 243 milli-err2: 0 -9/7i, decomp: 5 border 129 236 30 243 milli-err2: 8775 +9/7i, decomp: 5 border 129 236 30 243 milli-err2: 620 9/7f, decomp: 5 border 129 236 30 243 err2: 0.0001 5/3i, decomp:10 border 5 160 146 247 milli-err2: 0 -9/7i, decomp:10 border 5 160 146 247 milli-err2: 13478 +9/7i, decomp:10 border 5 160 146 247 milli-err2: 797 9/7f, decomp:10 border 5 160 146 247 err2: 0.0002 5/3i, decomp: 5 border 104 162 6 47 milli-err2: 0 -9/7i, decomp: 5 border 104 162 6 47 milli-err2: 7808 +9/7i, decomp: 5 border 104 162 6 47 milli-err2: 603 9/7f, decomp: 5 border 104 162 6 47 err2: 0.0001 5/3i, decomp:24 border 78 250 102 218 milli-err2: 0 -9/7i, decomp:24 border 78 250 102 218 milli-err2: 12570 +9/7i, decomp:24 border 78 250 102 218 milli-err2: 836 9/7f, decomp:24 border 78 250 102 218 err2: 0.0002 5/3i, decomp:28 border 86 98 56 79 milli-err2: 0 -9/7i, decomp:28 border 86 98 56 79 milli-err2: 4148 +9/7i, decomp:28 border 86 98 56 79 milli-err2: 597 9/7f, decomp:28 border 86 98 56 79 err2: 0.0001 5/3i, decomp: 6 border 95 238 197 214 milli-err2: 0 -9/7i, decomp: 6 border 95 238 197 214 milli-err2: 7686 +9/7i, decomp: 6 border 95 238 197 214 milli-err2: 478 9/7f, decomp: 6 border 95 238 197 214 err2: 0.0001 5/3i, decomp:17 border 77 169 93 165 milli-err2: 0 -9/7i, decomp:17 border 77 169 93 165 milli-err2: 12026 +9/7i, decomp:17 border 77 169 93 165 milli-err2: 616 9/7f, decomp:17 border 77 169 93 165 err2: 0.0001 5/3i, decomp:22 border 178 187 7 119 milli-err2: 0 -9/7i, decomp:22 border 178 187 7 119 milli-err2: 4971 +9/7i, decomp:22 border 178 187 7 119 milli-err2: 392 9/7f, decomp:22 border 178 187 7 119 err2: 0.0000 diff --git a/tests/ref/fate/jpeg2000-dcinema b/tests/ref/fate/jpeg2000-dcinema index 8040cb1d46..c7bf52aa2b 100644 --- a/tests/ref/fate/jpeg2000-dcinema +++ b/tests/ref/fate/jpeg2000-dcinema @@ -1,3 +1,3 @@ #tb 0: 1/24 -0, 0, 0, 1, 12441600, 0xbf142791 -0, 1, 1, 1, 12441600, 0x6b7a2ab5 +0, 0, 0, 1, 12441600, 0xda6b6cde +0, 1, 1, 1, 12441600, 0xb0994664 diff --git a/tests/ref/vsynth/vsynth1-jpeg2000-97 b/tests/ref/vsynth/vsynth1-jpeg2000-97 index b0fc029bfa..78dc6add29 100644 --- a/tests/ref/vsynth/vsynth1-jpeg2000-97 +++ b/tests/ref/vsynth/vsynth1-jpeg2000-97 @@ -1,4 +1,4 @@ -4c7dbe2451f56a49c29b0b5d7808d74d *tests/data/fate/vsynth1-jpeg2000-97.avi -3661616 tests/data/fate/vsynth1-jpeg2000-97.avi -d079e946a2fb75ad5ce6cb2760d1cc62 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo -stddev: 4.63 PSNR: 34.81 MAXDIFF: 54 bytes: 7603200/ 7603200 +a19cc0e1a1c1bf76ff5a0b63a0bdfbd1 *tests/data/fate/vsynth1-jpeg2000-97.avi +3654420 tests/data/fate/vsynth1-jpeg2000-97.avi +3b71c0f8aebf45122da77d892a6ebf00 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo +stddev: 4.23 PSNR: 35.59 MAXDIFF: 53 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth/vsynth2-jpeg2000-97 b/tests/ref/vsynth/vsynth2-jpeg2000-97 index a0c24d8ab1..7ba2d9f0ca 100644 --- a/tests/ref/vsynth/vsynth2-jpeg2000-97 +++ b/tests/ref/vsynth/vsynth2-jpeg2000-97 @@ -1,4 +1,4 @@ -c3582d23a1fca31a6218346b82167f88 *tests/data/fate/vsynth2-jpeg2000-97.avi -2451092 tests/data/fate/vsynth2-jpeg2000-97.avi -d1329b49bcfcf74279eb07f7e20ddcec *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo -stddev: 3.73 PSNR: 36.69 MAXDIFF: 30 bytes: 7603200/ 7603200 +b86217f0bcbd84a9368ad3f98af32157 *tests/data/fate/vsynth2-jpeg2000-97.avi +2448506 tests/data/fate/vsynth2-jpeg2000-97.avi +4d9d9db91075a1eca2a6b9f152e4defc *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo +stddev: 3.23 PSNR: 37.94 MAXDIFF: 29 bytes: 7603200/ 7603200 diff --git a/tests/ref/vsynth/vsynth3-jpeg2000-97 b/tests/ref/vsynth/vsynth3-jpeg2000-97 index 1d8d148a78..caf8d9d5e9 100644 --- a/tests/ref/vsynth/vsynth3-jpeg2000-97 +++ b/tests/ref/vsynth/vsynth3-jpeg2000-97 @@ -1,4 +1,4 @@ -2f8a9b514fbf1cb034076459463a7b76 *tests/data/fate/vsynth3-jpeg2000-97.avi -83866 tests/data/fate/vsynth3-jpeg2000-97.avi -febc7ef2ae9ec3f34b74d456922ae858 *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo -stddev: 4.87 PSNR: 34.37 MAXDIFF: 51 bytes: 86700/ 86700 +5e17fdaae1a22f3eef8c82b512e4b1b9 *tests/data/fate/vsynth3-jpeg2000-97.avi +83670 tests/data/fate/vsynth3-jpeg2000-97.avi +8ec04513b2e6645c9ea340e3fe9fe8f2 *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo +stddev: 4.52 PSNR: 35.02 MAXDIFF: 47 bytes: 86700/ 86700 diff --git a/tests/ref/vsynth/vsynth_lena-jpeg2000-97 b/tests/ref/vsynth/vsynth_lena-jpeg2000-97 index 089479b04f..ee5ad31cd5 100644 --- a/tests/ref/vsynth/vsynth_lena-jpeg2000-97 +++ b/tests/ref/vsynth/vsynth_lena-jpeg2000-97 @@ -1,4 +1,4 @@ -60808e880f1fd410b010feeca9105f4e *tests/data/fate/vsynth_lena-jpeg2000-97.avi -1931500 tests/data/fate/vsynth_lena-jpeg2000-97.avi -6d775a823d4b96cc6c121665bc7eb359 *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo -stddev: 3.38 PSNR: 37.53 MAXDIFF: 28 bytes: 7603200/ 7603200 +ca78db12e1af7cbf44fdce165aaa5130 *tests/data/fate/vsynth_lena-jpeg2000-97.avi +1918756 tests/data/fate/vsynth_lena-jpeg2000-97.avi +5fd8a2e35503b48af302b3ef5e317683 *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo +stddev: 2.84 PSNR: 39.04 MAXDIFF: 28 bytes: 7603200/ 7603200