diff --git a/libavcodec/aarch64/simple_idct_neon.S b/libavcodec/aarch64/simple_idct_neon.S index 52273420f9..92987985d2 100644 --- a/libavcodec/aarch64/simple_idct_neon.S +++ b/libavcodec/aarch64/simple_idct_neon.S @@ -61,37 +61,37 @@ endconst br x10 .endm -.macro smull1 a b c +.macro smull1 a, b, c smull \a, \b, \c .endm -.macro smlal1 a b c +.macro smlal1 a, b, c smlal \a, \b, \c .endm -.macro smlsl1 a b c +.macro smlsl1 a, b, c smlsl \a, \b, \c .endm -.macro idct_col4_top y1 y2 y3 y4 i l - smull\i v7.4S, \y3\().\l, z2 - smull\i v16.4S, \y3\().\l, z6 - smull\i v17.4S, \y2\().\l, z1 +.macro idct_col4_top y1, y2, y3, y4, i, l + smull\i v7.4S, \y3\l, z1 + smull\i v16.4S, \y3\l, z6 + smull\i v17.4S, \y2\l, z1 add v19.4S, v23.4S, v7.4S - smull\i v18.4S, \y2\().\l, z3 + smull\i v18.4S, \y2\l, z3 add v20.4S, v23.4S, v16.4S - smull\i v5.4S, \y2\().\l, z5 + smull\i v5.4S, \y2\l, z5 sub v21.4S, v23.4S, v16.4S - smull\i v6.4S, \y2\().\l, z7 + smull\i v6.4S, \y2\l, z7 sub v22.4S, v23.4S, v7.4S - smlal\i v17.4S, \y4\().\l, z3 - smlsl\i v18.4S, \y4\().\l, z7 - smlsl\i v5.4S, \y4\().\l, z1 - smlsl\i v6.4S, \y4\().\l, z5 + smlal\i v17.4S, \y4\l, z3 + smlsl\i v18.4S, \y4\l, z7 + smlsl\i v5.4S, \y4\l, z1 + smlsl\i v6.4S, \y4\l, z5 .endm -.macro idct_row4_neon y1 y2 y3 y4 pass +.macro idct_row4_neon y1, y2, y3, y4, pass ld1 {\y1\().2D-\y2\().2D}, [x2], #32 movi v23.4S, #1<<2, lsl #8 orr v5.16B, \y1\().16B, \y2\().16B @@ -101,7 +101,7 @@ endconst mov x3, v5.D[1] smlal v23.4S, \y1\().4H, z4 - idct_col4_top \y1 \y2 \y3 \y4 1 4H + idct_col4_top \y1, \y2, \y3, \y4, 1, .4H cmp x3, #0 beq \pass\()f @@ -153,7 +153,7 @@ endconst trn2 \y4\().4S, v17.4S, v19.4S .endm -.macro declare_idct_col4_neon i l +.macro declare_idct_col4_neon i, l function idct_col4_neon\i dup v23.4H, z4c .if \i == 1 @@ -164,14 +164,14 @@ function idct_col4_neon\i .endif smull v23.4S, v23.4H, z4 - idct_col4_top v24 v25 v26 v27 \i \l + idct_col4_top v24, v25, v26, v27, \i, \l mov x4, v28.D[\i - 1] mov x5, v29.D[\i - 1] cmp x4, #0 beq 1f - smull\i v7.4S, v28.\l, z4 + smull\i v7.4S, v28\l, z4 add v19.4S, v19.4S, v7.4S sub v20.4S, v20.4S, v7.4S sub v21.4S, v21.4S, v7.4S @@ -181,17 +181,17 @@ function idct_col4_neon\i cmp x5, #0 beq 2f - smlal\i v17.4S, v29.\l, z5 - smlsl\i v18.4S, v29.\l, z1 - smlal\i v5.4S, v29.\l, z7 - smlal\i v6.4S, v29.\l, z3 + smlal\i v17.4S, v29\l, z5 + smlsl\i v18.4S, v29\l, z1 + smlal\i v5.4S, v29\l, z7 + smlal\i v6.4S, v29\l, z3 2: mov x5, v31.D[\i - 1] cmp x4, #0 beq 3f - smull\i v7.4S, v30.\l, z6 - smull\i v16.4S, v30.\l, z2 + smull\i v7.4S, v30\l, z6 + smull\i v16.4S, v30\l, z2 add v19.4S, v19.4S, v7.4S sub v22.4S, v22.4S, v7.4S sub v20.4S, v20.4S, v16.4S @@ -200,10 +200,10 @@ function idct_col4_neon\i 3: cmp x5, #0 beq 4f - smlal\i v17.4S, v31.\l, z7 - smlsl\i v18.4S, v31.\l, z5 - smlal\i v5.4S, v31.\l, z3 - smlsl\i v6.4S, v31.\l, z1 + smlal\i v17.4S, v31\l, z7 + smlsl\i v18.4S, v31\l, z5 + smlal\i v5.4S, v31\l, z3 + smlsl\i v6.4S, v31\l, z1 4: addhn v7.4H, v19.4S, v17.4S addhn2 v7.8H, v20.4S, v18.4S @@ -219,14 +219,14 @@ function idct_col4_neon\i endfunc .endm -declare_idct_col4_neon 1 4H -declare_idct_col4_neon 2 8H +declare_idct_col4_neon 1, .4H +declare_idct_col4_neon 2, .8H function ff_simple_idct_put_neon, export=1 idct_start x2 - idct_row4_neon v24 v25 v26 v27 1 - idct_row4_neon v28 v29 v30 v31 2 + idct_row4_neon v24, v25, v26, v27, 1 + idct_row4_neon v28, v29, v30, v31, 2 bl idct_col4_neon1 sqshrun v1.8B, v7.8H, #COL_SHIFT-16 @@ -263,8 +263,8 @@ endfunc function ff_simple_idct_add_neon, export=1 idct_start x2 - idct_row4_neon v24 v25 v26 v27 1 - idct_row4_neon v28 v29 v30 v31 2 + idct_row4_neon v24, v25, v26, v27, 1 + idct_row4_neon v28, v29, v30, v31, 2 bl idct_col4_neon1 sshr v1.8H, V7.8H, #COL_SHIFT-16 @@ -328,8 +328,8 @@ function ff_simple_idct_neon, export=1 idct_start x0 mov x2, x0 - idct_row4_neon v24 v25 v26 v27 1 - idct_row4_neon v28 v29 v30 v31 2 + idct_row4_neon v24, v25, v26, v27, 1 + idct_row4_neon v28, v29, v30, v31, 2 add x2, x2, #-128 bl idct_col4_neon1