mirror of https://git.ffmpeg.org/ffmpeg.git
aarch64/vvc: Add dmvr
dmvr_8_12x20_c: 1.5 ( 1.00x) dmvr_8_12x20_neon: 0.2 ( 6.56x) dmvr_8_20x12_c: 1.0 ( 1.00x) dmvr_8_20x12_neon: 0.2 ( 4.33x) dmvr_8_20x20_c: 1.7 ( 1.00x) dmvr_8_20x20_neon: 0.5 ( 3.63x) dmvr_12_12x20_c: 2.2 ( 1.00x) dmvr_12_12x20_neon: 0.5 ( 4.68x) dmvr_12_20x12_c: 2.0 ( 1.00x) dmvr_12_20x12_neon: 0.5 ( 4.16x) dmvr_12_20x20_c: 3.7 ( 1.00x) dmvr_12_20x20_neon: 0.7 ( 5.14x) Signed-off-by: Zhao Zhili <zhilizhao@tencent.com>
This commit is contained in:
parent
bcd65ebd8f
commit
5988a2729b
|
@ -90,6 +90,8 @@ W_AVG_FUN(12)
|
|||
const uint8_t *_src, ptrdiff_t _src_stride, int height, \
|
||||
intptr_t mx, intptr_t my, int width);
|
||||
|
||||
DMVR_FUN(, 8)
|
||||
DMVR_FUN(, 12)
|
||||
DMVR_FUN(hv_, 8)
|
||||
DMVR_FUN(hv_, 10)
|
||||
DMVR_FUN(hv_, 12)
|
||||
|
@ -166,6 +168,7 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
|
|||
|
||||
c->inter.avg = ff_vvc_avg_8_neon;
|
||||
c->inter.w_avg = vvc_w_avg_8;
|
||||
c->inter.dmvr[0][0] = ff_vvc_dmvr_8_neon;
|
||||
c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_8_neon;
|
||||
|
||||
for (int i = 0; i < FF_ARRAY_ELEMS(c->sao.band_filter); i++)
|
||||
|
@ -215,6 +218,7 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
|
|||
} else if (bd == 12) {
|
||||
c->inter.avg = ff_vvc_avg_12_neon;
|
||||
c->inter.w_avg = vvc_w_avg_12;
|
||||
c->inter.dmvr[0][0] = ff_vvc_dmvr_12_neon;
|
||||
c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_12_neon;
|
||||
|
||||
c->alf.filter[LUMA] = alf_filter_luma_12_neon;
|
||||
|
|
|
@ -235,7 +235,7 @@ vvc_avg w_avg, 12
|
|||
* x5: intptr_t my
|
||||
* w6: int width
|
||||
*/
|
||||
function ff_vvc_dmvr_hv_8_neon, export=1
|
||||
function ff_vvc_dmvr_8_neon, export=1
|
||||
dst .req x0
|
||||
src .req x1
|
||||
src_stride .req x2
|
||||
|
@ -243,6 +243,91 @@ function ff_vvc_dmvr_hv_8_neon, export=1
|
|||
mx .req x4
|
||||
my .req x5
|
||||
width .req w6
|
||||
|
||||
sxtw x6, w6
|
||||
mov x7, #(VVC_MAX_PB_SIZE * 2 + 8)
|
||||
cmp width, #16
|
||||
sub src_stride, src_stride, x6
|
||||
cset w15, gt // width > 16
|
||||
movi v16.8h, #2 // DMVR_SHIFT
|
||||
sub x7, x7, x6, lsl #1
|
||||
1:
|
||||
cbz w15, 2f
|
||||
ldr q0, [src], #16
|
||||
uxtl v1.8h, v0.8b
|
||||
uxtl2 v2.8h, v0.16b
|
||||
ushl v1.8h, v1.8h, v16.8h
|
||||
ushl v2.8h, v2.8h, v16.8h
|
||||
stp q1, q2, [dst], #32
|
||||
b 3f
|
||||
2:
|
||||
ldr d0, [src], #8
|
||||
uxtl v1.8h, v0.8b
|
||||
ushl v1.8h, v1.8h, v16.8h
|
||||
str q1, [dst], #16
|
||||
3:
|
||||
subs height, height, #1
|
||||
ldr s3, [src], #4
|
||||
uxtl v4.8h, v3.8b
|
||||
ushl v4.4h, v4.4h, v16.4h
|
||||
st1 {v4.4h}, [dst], x7
|
||||
|
||||
add src, src, src_stride
|
||||
b.ne 1b
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_vvc_dmvr_12_neon, export=1
|
||||
sxtw x6, w6
|
||||
mov x7, #(VVC_MAX_PB_SIZE * 2 + 8)
|
||||
cmp width, #16
|
||||
sub src_stride, src_stride, x6, lsl #1
|
||||
cset w15, gt // width > 16
|
||||
movi v16.8h, #2 // offset4
|
||||
sub x7, x7, x6, lsl #1
|
||||
1:
|
||||
cbz w15, 2f
|
||||
ldp q0, q1, [src], #32
|
||||
uaddl v2.4s, v0.4h, v16.4h
|
||||
uaddl2 v3.4s, v0.8h, v16.8h
|
||||
uaddl v4.4s, v1.4h, v16.4h
|
||||
uaddl2 v5.4s, v1.8h, v16.8h
|
||||
ushr v2.4s, v2.4s, #2
|
||||
ushr v3.4s, v3.4s, #2
|
||||
ushr v4.4s, v4.4s, #2
|
||||
ushr v5.4s, v5.4s, #2
|
||||
uqxtn v2.4h, v2.4s
|
||||
uqxtn2 v2.8h, v3.4s
|
||||
uqxtn v4.4h, v4.4s
|
||||
uqxtn2 v4.8h, v5.4s
|
||||
|
||||
stp q2, q4, [dst], #32
|
||||
b 3f
|
||||
2:
|
||||
ldr q0, [src], #16
|
||||
uaddl v2.4s, v0.4h, v16.4h
|
||||
uaddl2 v3.4s, v0.8h, v16.8h
|
||||
ushr v2.4s, v2.4s, #2
|
||||
ushr v3.4s, v3.4s, #2
|
||||
uqxtn v2.4h, v2.4s
|
||||
uqxtn2 v2.8h, v3.4s
|
||||
str q2, [dst], #16
|
||||
3:
|
||||
subs height, height, #1
|
||||
ldr d0, [src], #8
|
||||
uaddl v3.4s, v0.4h, v16.4h
|
||||
ushr v3.4s, v3.4s, #2
|
||||
uqxtn v3.4h, v3.4s
|
||||
st1 {v3.4h}, [dst], x7
|
||||
|
||||
add src, src, src_stride
|
||||
b.ne 1b
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_vvc_dmvr_hv_8_neon, export=1
|
||||
tmp0 .req x7
|
||||
tmp1 .req x8
|
||||
|
||||
|
|
Loading…
Reference in New Issue