diff --git a/libavcodec/aarch64/h264pred_neon.S b/libavcodec/aarch64/h264pred_neon.S index 213b40b3e7..6fec33cf6a 100644 --- a/libavcodec/aarch64/h264pred_neon.S +++ b/libavcodec/aarch64/h264pred_neon.S @@ -81,8 +81,8 @@ function ff_pred16x16_dc_neon, export=1 .L_pred16x16_dc_end: mov w3, #8 6: st1 {v0.16b}, [x0], x1 - st1 {v0.16b}, [x0], x1 subs w3, w3, #1 + st1 {v0.16b}, [x0], x1 b.ne 6b ret endfunc @@ -91,8 +91,8 @@ function ff_pred16x16_hor_neon, export=1 sub x2, x0, #1 mov w3, #16 1: ld1r {v0.16b}, [x2], x1 - st1 {v0.16b}, [x0], x1 subs w3, w3, #1 + st1 {v0.16b}, [x0], x1 b.ne 1b ret endfunc @@ -102,9 +102,9 @@ function ff_pred16x16_vert_neon, export=1 add x1, x1, x1 ld1 {v0.16b}, [x2], x1 mov w3, #8 -1: st1 {v0.16b}, [x0], x1 +1: subs w3, w3, #1 + st1 {v0.16b}, [x0], x1 st1 {v0.16b}, [x2], x1 - subs w3, w3, #1 b.ne 1b ret endfunc @@ -158,8 +158,8 @@ function ff_pred16x16_plane_neon, export=1 add v1.8h, v1.8h, v2.8h sqshrun2 v0.16b, v1.8h, #5 add v1.8h, v1.8h, v3.8h - st1 {v0.16b}, [x0], x1 subs w3, w3, #1 + st1 {v0.16b}, [x0], x1 b.ne 1b ret endfunc @@ -175,8 +175,8 @@ function ff_pred8x8_hor_neon, export=1 sub x2, x0, #1 mov w3, #8 1: ld1r {v0.8b}, [x2], x1 - st1 {v0.8b}, [x0], x1 subs w3, w3, #1 + st1 {v0.8b}, [x0], x1 b.ne 1b ret endfunc @@ -186,9 +186,9 @@ function ff_pred8x8_vert_neon, export=1 lsl x1, x1, #1 ld1 {v0.8b}, [x2], x1 mov w3, #4 -1: st1 {v0.8b}, [x0], x1 +1: subs w3, w3, #1 + st1 {v0.8b}, [x0], x1 st1 {v0.8b}, [x2], x1 - subs w3, w3, #1 b.ne 1b ret endfunc @@ -232,9 +232,9 @@ function ff_pred8x8_plane_neon, export=1 mov w3, #8 1: sqshrun v0.8b, v1.8h, #5 + subs w3, w3, #1 add v1.8h, v1.8h, v2.8h st1 {v0.8b}, [x0], x1 - subs w3, w3, #1 b.ne 1b ret endfunc @@ -290,9 +290,9 @@ function ff_pred8x8_dc_neon, export=1 .L_pred8x8_dc_end: mov w3, #4 add x2, x0, x1, lsl #2 -6: st1 {v0.8b}, [x0], x1 +6: subs w3, w3, #1 + st1 {v0.8b}, [x0], x1 st1 {v1.8b}, [x2], x1 - subs w3, w3, #1 b.ne 6b ret endfunc