From 37cde570bc2dcd64a15c5d9a37b9fa0d78d84f9f Mon Sep 17 00:00:00 2001 From: "J. Dekker" Date: Wed, 22 Mar 2023 00:57:00 +0100 Subject: [PATCH] lavc/aarch64: add clip N macro Signed-off-by: J. Dekker --- libavcodec/aarch64/hevcdsp_idct_neon.S | 19 +++++-------------- libavcodec/aarch64/neon.S | 11 +++++++++++ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S index 467cb0f48a..3e59dd20bb 100644 --- a/libavcodec/aarch64/hevcdsp_idct_neon.S +++ b/libavcodec/aarch64/hevcdsp_idct_neon.S @@ -5,7 +5,7 @@ * * Ported from arm/hevcdsp_idct_neon.S by * Copyright (c) 2020 Reimar Döffinger - * Copyright (c) 2020 J. Dekker + * Copyright (c) 2023 J. Dekker * * This file is part of FFmpeg. * @@ -38,13 +38,6 @@ const trans, align=4 .short 31, 22, 13, 4 endconst -.macro clip2 in1, in2, min, max - smax \in1, \in1, \min - smax \in2, \in2, \min - smin \in1, \in1, \max - smin \in2, \in2, \max -.endm - function ff_hevc_add_residual_4x4_8_neon, export=1 ld1 {v0.8h-v1.8h}, [x1] ld1 {v2.s}[0], [x0], x2 @@ -182,7 +175,7 @@ function hevc_add_residual_4x4_16_neon, export=0 ld1 {v3.d}[1], [x12], x2 movi v4.8h, #0 sqadd v1.8h, v1.8h, v3.8h - clip2 v0.8h, v1.8h, v4.8h, v21.8h + clip v4.8h, v21.8h, v0.8h, v1.8h st1 {v0.d}[0], [x0], x2 st1 {v0.d}[1], [x0], x2 st1 {v1.d}[0], [x0], x2 @@ -201,7 +194,7 @@ function hevc_add_residual_8x8_16_neon, export=0 sqadd v0.8h, v0.8h, v2.8h ld1 {v3.8h}, [x12] sqadd v1.8h, v1.8h, v3.8h - clip2 v0.8h, v1.8h, v4.8h, v21.8h + clip v4.8h, v21.8h, v0.8h, v1.8h st1 {v0.8h}, [x0], x2 st1 {v1.8h}, [x12], x2 bne 1b @@ -221,8 +214,7 @@ function hevc_add_residual_16x16_16_neon, export=0 sqadd v1.8h, v1.8h, v17.8h sqadd v2.8h, v2.8h, v18.8h sqadd v3.8h, v3.8h, v19.8h - clip2 v0.8h, v1.8h, v20.8h, v21.8h - clip2 v2.8h, v3.8h, v20.8h, v21.8h + clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h st1 {v0.8h-v1.8h}, [x0], x2 st1 {v2.8h-v3.8h}, [x12], x2 bne 1b @@ -239,8 +231,7 @@ function hevc_add_residual_32x32_16_neon, export=0 sqadd v1.8h, v1.8h, v17.8h sqadd v2.8h, v2.8h, v18.8h sqadd v3.8h, v3.8h, v19.8h - clip2 v0.8h, v1.8h, v20.8h, v21.8h - clip2 v2.8h, v3.8h, v20.8h, v21.8h + clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h st1 {v0.8h-v3.8h}, [x0], x2 bne 1b ret diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S index 1ad32c359d..bc105e4861 100644 --- a/libavcodec/aarch64/neon.S +++ b/libavcodec/aarch64/neon.S @@ -1,6 +1,8 @@ /* * This file is part of FFmpeg. * + * Copyright (c) 2023 J. Dekker + * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either @@ -16,6 +18,15 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +.macro clip min, max, regs:vararg +.irp x, \regs + smax \x, \x, \min +.endr +.irp x, \regs + smin \x, \x, \max +.endr +.endm + .macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 trn1 \r8\().8B, \r0\().8B, \r1\().8B trn2 \r9\().8B, \r0\().8B, \r1\().8B