arm: Consistently use proper interworking function returns

Use "bx lr", or "pop {lr}", which do proper mode switching between thumb and arm modes. A plain "mov pc, lr" does not switch from thumb mode to arm mode (while in arm mode, it does switch mode for a thumb caller). This is normally not an issue, as CONFIG_THUMB only is enabled if the C compiler defaults to thumb; but stick to patterns that can do mode switching if needed, for consistency. Signed-off-by: Martin Storsjö <martin@martin.st>
2024-10-04 00:30:24 +03:00 · 2024-10-04 00:30:24 +03:00 · 77e6293735
parent ec9985b54f
commit 77e6293735
4 changed files with 7 additions and 10 deletions
--- a/libswresample/arm/resample.S
+++ b/libswresample/arm/resample.S
@ -30,7 +30,7 @@ function ff_resample_common_apply_filter_x4_float_neon, export=1
    vpadd.f32           d0, d0, d1                                     @ pair adding of the 4x32-bit accumulated values
    vpadd.f32           d0, d0, d0                                     @ pair adding of the 4x32-bit accumulator values
    vst1.32             {d0[0]}, [r0]                                  @ write accumulator
-    mov pc, lr
+    bx                  lr
 endfunc
 function ff_resample_common_apply_filter_x8_float_neon, export=1
@ -46,7 +46,7 @@ function ff_resample_common_apply_filter_x8_float_neon, export=1
    vpadd.f32           d0, d0, d1                                     @ pair adding of the 4x32-bit accumulated values
    vpadd.f32           d0, d0, d0                                     @ pair adding of the 4x32-bit accumulator values
    vst1.32             {d0[0]}, [r0]                                  @ write accumulator
-    mov pc, lr
+    bx                  lr
 endfunc
 function ff_resample_common_apply_filter_x4_s16_neon, export=1
@ -59,7 +59,7 @@ function ff_resample_common_apply_filter_x4_s16_neon, export=1
    vpadd.s32           d0, d0, d1                                     @ pair adding of the 4x32-bit accumulated values
    vpadd.s32           d0, d0, d0                                     @ pair adding of the 4x32-bit accumulator values
    vst1.32             {d0[0]}, [r0]                                  @ write accumulator
-    mov pc, lr
+    bx                  lr
 endfunc
 function ff_resample_common_apply_filter_x8_s16_neon, export=1
@ -73,5 +73,5 @@ function ff_resample_common_apply_filter_x8_s16_neon, export=1
    vpadd.s32           d0, d0, d1                                     @ pair adding of the 4x32-bit accumulated values
    vpadd.s32           d0, d0, d0                                     @ pair adding of the 4x32-bit accumulator values
    vst1.32             {d0[0]}, [r0]                                  @ write accumulator
-    mov pc, lr
+    bx                  lr
 endfunc
--- a/libswscale/arm/hscale.S
+++ b/libswscale/arm/hscale.S
@ -65,6 +65,5 @@ function ff_hscale_8_to_15_neon, export=1
    subs                r2, #2                                         @ dstW -= 2
    bgt                 1b                                             @ loop until end of line
    vpop                {q4-q7}
-    pop                 {r4-r12, lr}
+    pop                 {r4-r12, pc}
    mov pc, lr
 endfunc
--- a/libswscale/arm/output.S
+++ b/libswscale/arm/output.S
@ -73,6 +73,5 @@ function ff_yuv2planeX_8_neon, export=1
    subs                r4, r4, #8                                     @ dstW -= 8
    bgt                 2b                                             @ loop until width is consumed
    vpop                {q4-q7}
-    pop                 {r4-r12, lr}
+    pop                 {r4-r12, pc}
    mov                 pc, lr
 endfunc
--- a/libswscale/arm/yuv2rgb_neon.S
+++ b/libswscale/arm/yuv2rgb_neon.S
@ -262,8 +262,7 @@ function ff_\ifmt\()_to_\ofmt\()_neon, export=1
    increment_and_test_\ifmt
    bgt                 1b
    vpop                {q4-q7}
-    pop                 {r4-r12, lr}
+    pop                 {r4-r12, pc}
    mov                 pc, lr
 endfunc
 .endm