mirror of https://git.ffmpeg.org/ffmpeg.git
lavc/exrdsp: unroll predictor
With explicit unrolling, we can skip half of the sign bit flips, and the compiler is then better able to optimise the scalar loop: predictor_c: 31376.0 (before) predictor_c: 23703.0 (after)
This commit is contained in:
parent
c536e92207
commit
ce467421dc
|
@ -40,10 +40,20 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si
|
||||||
|
|
||||||
static void predictor_scalar(uint8_t *src, ptrdiff_t size)
|
static void predictor_scalar(uint8_t *src, ptrdiff_t size)
|
||||||
{
|
{
|
||||||
ptrdiff_t i;
|
/* Unrolled: `src[i + 1] += src[i] - 128;` */
|
||||||
|
if ((size & 1) == 0) {
|
||||||
|
src[1] += src[0] ^ 0x80;
|
||||||
|
src++;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 1; i < size; i++)
|
for (ptrdiff_t i = 1; i < size; i += 2) {
|
||||||
src[i] += src[i-1] - 128;
|
uint8_t a = src[i] + src[i - 1];
|
||||||
|
|
||||||
|
src[i] = a;
|
||||||
|
src[i + 1] += a;
|
||||||
|
src[i] ^= 0x80;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
av_cold void ff_exrdsp_init(ExrDSPContext *c)
|
av_cold void ff_exrdsp_init(ExrDSPContext *c)
|
||||||
|
|
Loading…
Reference in New Issue