lavc/exrdsp: unroll predictor

With explicit unrolling, we can skip half of the sign bit flips, and the compiler is then better able to optimise the scalar loop: predictor_c: 31376.0 (before) predictor_c: 23703.0 (after)
2023-11-11 17:08:45 +02:00 · 2023-11-11 17:08:45 +02:00 · ce467421dc
parent c536e92207
commit ce467421dc
1 changed files with 13 additions and 3 deletions
--- a/libavcodec/exrdsp.c
+++ b/libavcodec/exrdsp.c
@ -40,10 +40,20 @@ static void reorder_pixels_scalar(uint8_t *dst, const uint8_t *src, ptrdiff_t si

 static void predictor_scalar(uint8_t *src, ptrdiff_t size)
 {
-    ptrdiff_t i;
+    /* Unrolled: `src[i + 1] += src[i] - 128;` */
+    if ((size & 1) == 0) {
+        src[1] += src[0] ^ 0x80;
+        src++;
+        size--;
+    }

-    for (i = 1; i < size; i++)
-        src[i] += src[i-1] - 128;
+    for (ptrdiff_t i = 1; i < size; i += 2) {
+        uint8_t a = src[i] + src[i - 1];
+
+        src[i] = a;
+        src[i + 1] += a;
+        src[i] ^= 0x80;
+    }
 }

 av_cold void ff_exrdsp_init(ExrDSPContext *c)