h264: avoid undefined behavior in chroma motion compensation

Makes fate-h264 pass under valgrind --undef-value-errors=yes with
-cpuflags none. {avg,put}_h264_chroma_mc8_8 approximately 5% faster,
{avg,put}_h264_chroma_mc4_8 2% faster both on x86 and arm.
This commit is contained in:
Janne Grunau 2014-02-14 17:00:06 +01:00
parent 4bcca3611d
commit 982b596ea6
1 changed files with 32 additions and 3 deletions

View File

@ -43,7 +43,7 @@ static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *
dst+= stride;\
src+= stride;\
}\
}else{\
} else if (B + C) {\
const int E= B+C;\
const int step= C ? stride : 1;\
for(i=0; i<h; i++){\
@ -52,6 +52,13 @@ static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *
dst+= stride;\
src+= stride;\
}\
} else {\
for ( i = 0; i < h; i++){\
OP(dst[0], A * src[0]);\
OP(dst[1], A * src[1]);\
dst += stride;\
src += stride;\
}\
}\
}\
\
@ -76,7 +83,7 @@ static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *
dst+= stride;\
src+= stride;\
}\
}else{\
} else if (B + C) {\
const int E= B+C;\
const int step= C ? stride : 1;\
for(i=0; i<h; i++){\
@ -87,6 +94,15 @@ static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *
dst+= stride;\
src+= stride;\
}\
} else {\
for ( i = 0; i < h; i++){\
OP(dst[0], A * src[0]);\
OP(dst[1], A * src[1]);\
OP(dst[2], A * src[2]);\
OP(dst[3], A * src[3]);\
dst += stride;\
src += stride;\
}\
}\
}\
\
@ -115,7 +131,7 @@ static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *
dst+= stride;\
src+= stride;\
}\
}else{\
} else if (B + C) {\
const int E= B+C;\
const int step= C ? stride : 1;\
for(i=0; i<h; i++){\
@ -130,6 +146,19 @@ static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *
dst+= stride;\
src+= stride;\
}\
} else {\
for ( i = 0; i < h; i++){\
OP(dst[0], A * src[0]);\
OP(dst[1], A * src[1]);\
OP(dst[2], A * src[2]);\
OP(dst[3], A * src[3]);\
OP(dst[4], A * src[4]);\
OP(dst[5], A * src[5]);\
OP(dst[6], A * src[6]);\
OP(dst[7], A * src[7]);\
dst += stride;\
src += stride;\
}\
}\
}