mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-25 00:32:31 +00:00
lavc/aarch64: add clip N macro
Signed-off-by: J. Dekker <jdek@itanimul.li>
This commit is contained in:
parent
9413bdc381
commit
37cde570bc
@ -5,7 +5,7 @@
|
|||||||
*
|
*
|
||||||
* Ported from arm/hevcdsp_idct_neon.S by
|
* Ported from arm/hevcdsp_idct_neon.S by
|
||||||
* Copyright (c) 2020 Reimar Döffinger
|
* Copyright (c) 2020 Reimar Döffinger
|
||||||
* Copyright (c) 2020 J. Dekker
|
* Copyright (c) 2023 J. Dekker <jdek@itanimul.li>
|
||||||
*
|
*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
*
|
*
|
||||||
@ -38,13 +38,6 @@ const trans, align=4
|
|||||||
.short 31, 22, 13, 4
|
.short 31, 22, 13, 4
|
||||||
endconst
|
endconst
|
||||||
|
|
||||||
.macro clip2 in1, in2, min, max
|
|
||||||
smax \in1, \in1, \min
|
|
||||||
smax \in2, \in2, \min
|
|
||||||
smin \in1, \in1, \max
|
|
||||||
smin \in2, \in2, \max
|
|
||||||
.endm
|
|
||||||
|
|
||||||
function ff_hevc_add_residual_4x4_8_neon, export=1
|
function ff_hevc_add_residual_4x4_8_neon, export=1
|
||||||
ld1 {v0.8h-v1.8h}, [x1]
|
ld1 {v0.8h-v1.8h}, [x1]
|
||||||
ld1 {v2.s}[0], [x0], x2
|
ld1 {v2.s}[0], [x0], x2
|
||||||
@ -182,7 +175,7 @@ function hevc_add_residual_4x4_16_neon, export=0
|
|||||||
ld1 {v3.d}[1], [x12], x2
|
ld1 {v3.d}[1], [x12], x2
|
||||||
movi v4.8h, #0
|
movi v4.8h, #0
|
||||||
sqadd v1.8h, v1.8h, v3.8h
|
sqadd v1.8h, v1.8h, v3.8h
|
||||||
clip2 v0.8h, v1.8h, v4.8h, v21.8h
|
clip v4.8h, v21.8h, v0.8h, v1.8h
|
||||||
st1 {v0.d}[0], [x0], x2
|
st1 {v0.d}[0], [x0], x2
|
||||||
st1 {v0.d}[1], [x0], x2
|
st1 {v0.d}[1], [x0], x2
|
||||||
st1 {v1.d}[0], [x0], x2
|
st1 {v1.d}[0], [x0], x2
|
||||||
@ -201,7 +194,7 @@ function hevc_add_residual_8x8_16_neon, export=0
|
|||||||
sqadd v0.8h, v0.8h, v2.8h
|
sqadd v0.8h, v0.8h, v2.8h
|
||||||
ld1 {v3.8h}, [x12]
|
ld1 {v3.8h}, [x12]
|
||||||
sqadd v1.8h, v1.8h, v3.8h
|
sqadd v1.8h, v1.8h, v3.8h
|
||||||
clip2 v0.8h, v1.8h, v4.8h, v21.8h
|
clip v4.8h, v21.8h, v0.8h, v1.8h
|
||||||
st1 {v0.8h}, [x0], x2
|
st1 {v0.8h}, [x0], x2
|
||||||
st1 {v1.8h}, [x12], x2
|
st1 {v1.8h}, [x12], x2
|
||||||
bne 1b
|
bne 1b
|
||||||
@ -221,8 +214,7 @@ function hevc_add_residual_16x16_16_neon, export=0
|
|||||||
sqadd v1.8h, v1.8h, v17.8h
|
sqadd v1.8h, v1.8h, v17.8h
|
||||||
sqadd v2.8h, v2.8h, v18.8h
|
sqadd v2.8h, v2.8h, v18.8h
|
||||||
sqadd v3.8h, v3.8h, v19.8h
|
sqadd v3.8h, v3.8h, v19.8h
|
||||||
clip2 v0.8h, v1.8h, v20.8h, v21.8h
|
clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
|
||||||
clip2 v2.8h, v3.8h, v20.8h, v21.8h
|
|
||||||
st1 {v0.8h-v1.8h}, [x0], x2
|
st1 {v0.8h-v1.8h}, [x0], x2
|
||||||
st1 {v2.8h-v3.8h}, [x12], x2
|
st1 {v2.8h-v3.8h}, [x12], x2
|
||||||
bne 1b
|
bne 1b
|
||||||
@ -239,8 +231,7 @@ function hevc_add_residual_32x32_16_neon, export=0
|
|||||||
sqadd v1.8h, v1.8h, v17.8h
|
sqadd v1.8h, v1.8h, v17.8h
|
||||||
sqadd v2.8h, v2.8h, v18.8h
|
sqadd v2.8h, v2.8h, v18.8h
|
||||||
sqadd v3.8h, v3.8h, v19.8h
|
sqadd v3.8h, v3.8h, v19.8h
|
||||||
clip2 v0.8h, v1.8h, v20.8h, v21.8h
|
clip v20.8h, v21.8h, v0.8h, v1.8h, v2.8h, v3.8h
|
||||||
clip2 v2.8h, v3.8h, v20.8h, v21.8h
|
|
||||||
st1 {v0.8h-v3.8h}, [x0], x2
|
st1 {v0.8h-v3.8h}, [x0], x2
|
||||||
bne 1b
|
bne 1b
|
||||||
ret
|
ret
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* This file is part of FFmpeg.
|
* This file is part of FFmpeg.
|
||||||
*
|
*
|
||||||
|
* Copyright (c) 2023 J. Dekker <jdek@itanimul.li>
|
||||||
|
*
|
||||||
* FFmpeg is free software; you can redistribute it and/or
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
* License as published by the Free Software Foundation; either
|
* License as published by the Free Software Foundation; either
|
||||||
@ -16,6 +18,15 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
.macro clip min, max, regs:vararg
|
||||||
|
.irp x, \regs
|
||||||
|
smax \x, \x, \min
|
||||||
|
.endr
|
||||||
|
.irp x, \regs
|
||||||
|
smin \x, \x, \max
|
||||||
|
.endr
|
||||||
|
.endm
|
||||||
|
|
||||||
.macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
|
.macro transpose_8x8B r0, r1, r2, r3, r4, r5, r6, r7, r8, r9
|
||||||
trn1 \r8\().8B, \r0\().8B, \r1\().8B
|
trn1 \r8\().8B, \r0\().8B, \r1\().8B
|
||||||
trn2 \r9\().8B, \r0\().8B, \r1\().8B
|
trn2 \r9\().8B, \r0\().8B, \r1\().8B
|
||||||
|
Loading…
Reference in New Issue
Block a user