mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-25 08:42:39 +00:00
Change rounding of the horizontal DWT to match the vertical one.
This allows some simplifications and optimizations and should not have any effect on quality. Originally committed as revision 10172 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
7506d47aa3
commit
ce611a27be
@ -111,8 +111,7 @@ void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){
|
||||
|
||||
i = 0;
|
||||
asm volatile(
|
||||
"pcmpeqd %%xmm7, %%xmm7 \n\t"
|
||||
"psrad $29, %%xmm7 \n\t"
|
||||
"pslld $1, %%xmm7 \n\t"
|
||||
::);
|
||||
for(; i<w_l-7; i+=8){
|
||||
asm volatile(
|
||||
@ -157,25 +156,21 @@ void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){
|
||||
"movdqu 20(%1), %%xmm6 \n\t"
|
||||
"paddd (%1), %%xmm2 \n\t"
|
||||
"paddd 16(%1), %%xmm6 \n\t"
|
||||
"movdqa %%xmm2, %%xmm0 \n\t"
|
||||
"movdqa %%xmm6, %%xmm4 \n\t"
|
||||
"pslld $2, %%xmm2 \n\t"
|
||||
"pslld $2, %%xmm6 \n\t"
|
||||
"psubd %%xmm2, %%xmm0 \n\t"
|
||||
"psubd %%xmm6, %%xmm4 \n\t"
|
||||
"psrad $1, %%xmm0 \n\t"
|
||||
"psrad $1, %%xmm4 \n\t"
|
||||
"movdqu (%0), %%xmm2 \n\t"
|
||||
"movdqu 16(%0), %%xmm6 \n\t"
|
||||
"psubd %%xmm0, %%xmm2 \n\t"
|
||||
"psubd %%xmm4, %%xmm6 \n\t"
|
||||
"movdqu (%0), %%xmm0 \n\t"
|
||||
"movdqu 16(%0), %%xmm4 \n\t"
|
||||
"paddd %%xmm2, %%xmm0 \n\t"
|
||||
"paddd %%xmm6, %%xmm4 \n\t"
|
||||
"psrad $1, %%xmm2 \n\t"
|
||||
"psrad $1, %%xmm6 \n\t"
|
||||
"paddd %%xmm0, %%xmm2 \n\t"
|
||||
"paddd %%xmm4, %%xmm6 \n\t"
|
||||
"movdqa %%xmm2, (%2) \n\t"
|
||||
"movdqa %%xmm6, 16(%2) \n\t"
|
||||
:: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO, W_AS);
|
||||
snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
|
||||
}
|
||||
|
||||
{
|
||||
@ -291,10 +286,9 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
|
||||
DWTELEM * const ref = b+w2 - 1;
|
||||
|
||||
i = 1;
|
||||
b[0] = b[0] + (((2 * ref[1] + W_BO-1) + 4 * b[0]) >> W_BS);
|
||||
b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS);
|
||||
asm volatile(
|
||||
"pcmpeqd %%mm7, %%mm7 \n\t"
|
||||
"psrld $29, %%mm7 \n\t"
|
||||
"pslld $1, %%mm7 \n\t"
|
||||
::);
|
||||
for(; i<w_l-3; i+=4){
|
||||
asm volatile(
|
||||
@ -333,16 +327,12 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
|
||||
"movq 12(%1), %%mm6 \n\t"
|
||||
"paddd (%1), %%mm2 \n\t"
|
||||
"paddd 8(%1), %%mm6 \n\t"
|
||||
"pxor %%mm0, %%mm0 \n\t" //note: the 2 xor could be avoided if we would flip the rounding direction
|
||||
"pxor %%mm4, %%mm4 \n\t"
|
||||
"psubd %%mm2, %%mm0 \n\t"
|
||||
"psubd %%mm6, %%mm4 \n\t"
|
||||
"psrad $1, %%mm0 \n\t"
|
||||
"psrad $1, %%mm4 \n\t"
|
||||
"psubd %%mm0, %%mm2 \n\t"
|
||||
"psubd %%mm4, %%mm6 \n\t"
|
||||
"movq (%0), %%mm0 \n\t"
|
||||
"movq 8(%0), %%mm4 \n\t"
|
||||
"paddd %%mm2, %%mm0 \n\t"
|
||||
"paddd %%mm6, %%mm4 \n\t"
|
||||
"psrad $1, %%mm2 \n\t"
|
||||
"psrad $1, %%mm6 \n\t"
|
||||
"paddd %%mm0, %%mm2 \n\t"
|
||||
"paddd %%mm4, %%mm6 \n\t"
|
||||
"movq %%mm2, (%2) \n\t"
|
||||
@ -351,7 +341,7 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO, W_AS);
|
||||
snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -775,7 +775,7 @@ static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int
|
||||
int i;
|
||||
|
||||
assert(shift == 4);
|
||||
#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
|
||||
#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*4*(src) + 4*(ref) + add + 5 + (5<<27))/(5*16) - (1<<23)))
|
||||
if(mirror_left){
|
||||
dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
|
||||
dst += dst_step;
|
||||
@ -1113,8 +1113,8 @@ static void horizontal_decompose97i(DWTELEM *b, int width){
|
||||
DWTELEM temp[width];
|
||||
const int w2= (width+1)>>1;
|
||||
|
||||
lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
|
||||
liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
|
||||
lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
|
||||
liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
|
||||
lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
|
||||
lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
|
||||
}
|
||||
@ -1150,7 +1150,7 @@ static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w
|
||||
#ifdef liftS
|
||||
b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
|
||||
#else
|
||||
b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
|
||||
b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -1344,8 +1344,8 @@ void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
|
||||
|
||||
lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
|
||||
lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
|
||||
liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO-1, W_BS, 0, 1);
|
||||
lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
|
||||
liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
|
||||
lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
|
||||
}
|
||||
|
||||
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
||||
|
@ -165,11 +165,11 @@ static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELE
|
||||
|
||||
static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
|
||||
for(; i<w; i++){
|
||||
dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO-1 + 4 * src[i]) >> W_BS);
|
||||
dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO + 4 * src[i]) >> W_BS);
|
||||
}
|
||||
|
||||
if(width&1){
|
||||
dst[w] = src[w] + ((2 * ref[w] + W_BO-1 + 4 * src[w]) >> W_BS);
|
||||
dst[w] = src[w] + ((2 * ref[w] + W_BO + 4 * src[w]) >> W_BS);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -141,9 +141,9 @@ f8f51fa737add17f7fecaefa118b57ed *./tests/data/a-ffv1.avi
|
||||
2654678 ./tests/data/a-ffv1.avi
|
||||
799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv
|
||||
stddev: 0.00 PSNR:99.99 bytes:7602176
|
||||
9078723c943de5d79490f54b99e6ea9e *./tests/data/a-snow.avi
|
||||
156656 ./tests/data/a-snow.avi
|
||||
f2932084b52e2ede167c9ba21eae0656 *./tests/data/out.yuv
|
||||
958d649d09b7361d5f00b5b3fcccbcd2 *./tests/data/a-snow.avi
|
||||
156606 ./tests/data/a-snow.avi
|
||||
b19cb7f9134f922326028c6bb44e96de *./tests/data/out.yuv
|
||||
stddev: 23.14 PSNR:20.83 bytes:7602176
|
||||
ba999e86070aa971376e7f317a022c37 *./tests/data/a-snow53.avi
|
||||
3519486 ./tests/data/a-snow53.avi
|
||||
|
@ -141,9 +141,9 @@ d72b0960e162d4998b9acbabb07e99ab *./tests/data/a-ffv1.avi
|
||||
3525804 ./tests/data/a-ffv1.avi
|
||||
dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv
|
||||
stddev: 0.00 PSNR:99.99 bytes:7602176
|
||||
40a6e938ac2bd92ee12cd57925e86454 *./tests/data/a-snow.avi
|
||||
68758 ./tests/data/a-snow.avi
|
||||
1e356854142898c7c4aab4bfedadf235 *./tests/data/out.yuv
|
||||
2cfa1bdb443d04a890208a83fd239461 *./tests/data/a-snow.avi
|
||||
68872 ./tests/data/a-snow.avi
|
||||
64a0495b7ab53509d3b791465262795c *./tests/data/out.yuv
|
||||
stddev: 10.86 PSNR:27.40 bytes:7602176
|
||||
3d0da6aeec9b80c6ee0ff4b747bdd0f0 *./tests/data/a-snow53.avi
|
||||
2721980 ./tests/data/a-snow53.avi
|
||||
|
@ -2046,51 +2046,51 @@ ret: 0 st:-1 ts:-0.645825 flags:1
|
||||
ret: 0 st: 0 dts:0.040000 pts:0.040000 pos:9610 size:1075 flags:0
|
||||
----------------
|
||||
tests/data/a-snow.avi
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
|
||||
ret: 0 st:-1 ts:-1.000000 flags:0
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
|
||||
ret: 0 st:-1 ts:1.894167 flags:1
|
||||
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
|
||||
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
|
||||
ret: 0 st: 0 ts:0.800000 flags:0
|
||||
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1
|
||||
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1
|
||||
ret:-1 st: 0 ts:-0.320000 flags:1
|
||||
ret:-1 st:-1 ts:2.576668 flags:0
|
||||
ret: 0 st:-1 ts:1.470835 flags:1
|
||||
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
|
||||
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
|
||||
ret: 0 st: 0 ts:0.360000 flags:0
|
||||
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
|
||||
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1
|
||||
ret:-1 st: 0 ts:-0.760000 flags:1
|
||||
ret:-1 st:-1 ts:2.153336 flags:0
|
||||
ret: 0 st:-1 ts:1.047503 flags:1
|
||||
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1
|
||||
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1
|
||||
ret: 0 st: 0 ts:-0.040000 flags:0
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
|
||||
ret: 0 st: 0 ts:2.840000 flags:1
|
||||
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
|
||||
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
|
||||
ret: 0 st:-1 ts:1.730004 flags:0
|
||||
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
|
||||
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
|
||||
ret: 0 st:-1 ts:0.624171 flags:1
|
||||
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
|
||||
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1
|
||||
ret: 0 st: 0 ts:-0.480000 flags:0
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
|
||||
ret: 0 st: 0 ts:2.400000 flags:1
|
||||
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
|
||||
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
|
||||
ret: 0 st:-1 ts:1.306672 flags:0
|
||||
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
|
||||
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
|
||||
ret: 0 st:-1 ts:0.200839 flags:1
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
|
||||
ret: 0 st: 0 ts:-0.920000 flags:0
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2986 flags:1
|
||||
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
|
||||
ret: 0 st: 0 ts:2.000000 flags:1
|
||||
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63240 size:3635 flags:1
|
||||
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63350 size:3635 flags:1
|
||||
ret: 0 st:-1 ts:0.883340 flags:0
|
||||
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31726 size:3478 flags:1
|
||||
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31690 size:3478 flags:1
|
||||
ret:-1 st:-1 ts:-0.222493 flags:1
|
||||
ret:-1 st: 0 ts:2.680000 flags:0
|
||||
ret: 0 st: 0 ts:1.560000 flags:1
|
||||
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46794 size:3663 flags:1
|
||||
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46908 size:3663 flags:1
|
||||
ret: 0 st:-1 ts:0.460008 flags:0
|
||||
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:18006 size:3229 flags:1
|
||||
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17990 size:3229 flags:1
|
||||
ret:-1 st:-1 ts:-0.645825 flags:1
|
||||
----------------
|
||||
tests/data/a-snow53.avi
|
||||
|
Loading…
Reference in New Issue
Block a user