lavc/vvc_mc: R-V V dmvr

k230               banana_f3
dmvr_8_12x20_c:                       619.3 ( 1.00x)    624.1 ( 1.00x)
dmvr_8_12x20_rvv_i32:                 128.6 ( 4.82x)    103.4 ( 6.04x)
dmvr_8_20x12_c:                       610.0 ( 1.00x)    665.6 ( 1.00x)
dmvr_8_20x12_rvv_i32:                 137.6 ( 4.44x)    92.9 ( 7.17x)
dmvr_8_20x20_c:                      1008.0 ( 1.00x)    1082.7 ( 1.00x)
dmvr_8_20x20_rvv_i32:                 221.1 ( 4.56x)    155.4 ( 6.97x)
dmvr_h_8_12x20_c:                    2008.0 ( 1.00x)    2009.7 ( 1.00x)
dmvr_h_8_12x20_rvv_i32:               239.6 ( 8.38x)    186.7 (10.77x)
dmvr_h_8_20x12_c:                    1989.5 ( 1.00x)    2009.4 ( 1.00x)
dmvr_h_8_20x12_rvv_i32:               230.3 ( 8.64x)    155.4 (12.93x)
dmvr_h_8_20x20_c:                    3304.1 ( 1.00x)    3342.9 ( 1.00x)
dmvr_h_8_20x20_rvv_i32:               378.3 ( 8.73x)    248.9 (13.43x)
dmvr_hv_8_12x20_c:                   3609.8 ( 1.00x)    3603.4 ( 1.00x)
dmvr_hv_8_12x20_rvv_i32:              369.1 ( 9.78x)    322.1 (11.19x)
dmvr_hv_8_20x12_c:                   3628.3 ( 1.00x)    3624.2 ( 1.00x)
dmvr_hv_8_20x12_rvv_i32:              322.8 (11.24x)    238.7 (15.19x)
dmvr_hv_8_20x20_c:                   5933.8 ( 1.00x)    5936.6 ( 1.00x)
dmvr_hv_8_20x20_rvv_i32:              526.5 (11.27x)    374.1 (15.87x)
dmvr_v_8_12x20_c:                    2156.3 ( 1.00x)    2155.4 ( 1.00x)
dmvr_v_8_12x20_rvv_i32:               239.6 ( 9.00x)    176.2 (12.24x)
dmvr_v_8_20x12_c:                    2137.6 ( 1.00x)    2165.9 ( 1.00x)
dmvr_v_8_20x12_rvv_i32:               230.3 ( 9.28x)    155.2 (13.96x)
dmvr_v_8_20x20_c:                    4183.8 ( 1.00x)    3592.9 ( 1.00x)
dmvr_v_8_20x20_rvv_i32:               369.3 (11.33x)    249.2 (14.42x)
This commit is contained in:
sunyuechi 2024-12-15 23:56:02 +08:00 committed by Nuo Mi
parent b86766d610
commit 89df9c4404
2 changed files with 144 additions and 0 deletions

View File

@ -285,3 +285,125 @@ endfunc
func_w_avg 128
func_w_avg 256
#endif
func dmvr zve32x, zbb, zba
lpad 0
li t0, 4
1:
add t1, a1, a2
addi t4, a0, 128*2
vle8.v v0, (a1)
vle8.v v4, (t1)
addi a3, a3, -2
vwmulu.vx v16, v0, t0
vwmulu.vx v20, v4, t0
vse16.v v16, (a0)
vse16.v v20, (t4)
sh1add a1, a2, a1
add a0, a0, 128*2*2
bnez a3, 1b
ret
endfunc
.macro dmvr_h_v mn, type, w, vlen
func dmvr_\type\vlen\w, zve32x, zbb, zba
lla t4, ff_vvc_inter_luma_dmvr_filters
sh1add t4, \mn, t4
lbu t5, (t4)
lbu t6, 1(t4)
1:
vsetvlstatic8 \w, \vlen
.ifc \type,h
addi t0, a1, 1
addi t1, a1, 2
.else
add t0, a1, a2
add t1, t0, a2
.endif
vle8.v v0, (a1)
vle8.v v4, (t0)
vle8.v v8, (t1)
addi a3, a3, -2
addi t2, a0, 128*2
vwmulu.vx v12, v0, t5
vwmulu.vx v24, v4, t5
vwmaccu.vx v12, t6, v4
vwmaccu.vx v24, t6, v8
vsetvlstatic16 \w, \vlen
vssrl.vi v12, v12, 2
vssrl.vi v24, v24, 2
vse16.v v12, (a0)
vse16.v v24, (t2)
add a0, a0, 128*4
sh1add a1, a2, a1
bnez a3, 1b
ret
endfunc
.endm
.macro dmvr_load_h dst, filter0, filter1, w, vlen
vsetvlstatic8 \w, \vlen
addi a6, a1, 1
vle8.v \dst, (a1)
vle8.v v2, (a6)
vwmulu.vx v4, \dst, \filter0
vwmaccu.vx v4, \filter1, v2
vsetvlstatic16 \w, \vlen
vssrl.vi \dst, v4, 2
.endm
.macro dmvr_hv w, vlen
func dmvr_hv\vlen\w, zve32x, zbb, zba
lla t0, ff_vvc_inter_luma_dmvr_filters
sh1add t1, a4, t0
sh1add t2, a5, t0
lbu t3, (t1) // filter[mx][0]
lbu t4, 1(t1) // filter[mx][1]
lbu t5, (t2) // filter[my][0]
lbu t6, 1(t2) // filter[my][1]
dmvr_load_h v12, t3, t4, \w, \vlen
add a1, a1, a2
1:
vmul.vx v28, v12, t5
addi a3, a3, -1
dmvr_load_h v12, t3, t4, \w, \vlen
vmacc.vx v28, t6, v12
vssrl.vi v28, v28, 4
vse16.v v28, (a0)
add a1, a1, a2
addi a0, a0, 128*2
bnez a3, 1b
ret
endfunc
.endm
.macro func_dmvr vlen, name
func ff_vvc_\name\()_8_rvv_\vlen\(), zve32x, zbb, zba
lpad 0
li t0, 20
beq a6, t0, DMVR\name\vlen\()20
.irp w,12,20
DMVR\name\vlen\w:
.ifc \name, dmvr
vsetvlstatic8 \w, \vlen
j \name
.else
csrwi vxrm, 0
j \name\()\vlen\w
.endif
.endr
endfunc
.endm
.irp vlen,256,128
.irp w,12,20
dmvr_h_v a4, h, \w, \vlen
dmvr_h_v a5, v, \w, \vlen
dmvr_hv \w, \vlen
.endr
func_dmvr \vlen, dmvr
func_dmvr \vlen, dmvr_h
func_dmvr \vlen, dmvr_v
func_dmvr \vlen, dmvr_hv
.endr

View File

@ -37,6 +37,26 @@ void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,
AVG_PROTOTYPES(8, rvv_128)
AVG_PROTOTYPES(8, rvv_256)
#define DMVR_PROTOTYPES(bd, opt) \
void ff_vvc_dmvr_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
int height, intptr_t mx, intptr_t my, int width); \
void ff_vvc_dmvr_h_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
int height, intptr_t mx, intptr_t my, int width); \
void ff_vvc_dmvr_v_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
int height, intptr_t mx, intptr_t my, int width); \
void ff_vvc_dmvr_hv_##bd##_##opt(int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, \
int height, intptr_t mx, intptr_t my, int width); \
DMVR_PROTOTYPES(8, rvv_128)
DMVR_PROTOTYPES(8, rvv_256)
#define DMVR_INIT(bd, opt) do { \
c->inter.dmvr[0][0] = ff_vvc_dmvr_##bd##_##opt; \
c->inter.dmvr[0][1] = ff_vvc_dmvr_h_##bd##_##opt; \
c->inter.dmvr[1][0] = ff_vvc_dmvr_v_##bd##_##opt; \
c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_##bd##_##opt; \
} while (0)
void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd)
{
#if HAVE_RVV
@ -54,6 +74,7 @@ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd)
# if (__riscv_xlen == 64)
c->inter.w_avg = ff_vvc_w_avg_8_rvv_256;
# endif
DMVR_INIT(8, rvv_256);
break;
default:
break;
@ -65,6 +86,7 @@ void ff_vvc_dsp_init_riscv(VVCDSPContext *const c, const int bd)
# if (__riscv_xlen == 64)
c->inter.w_avg = ff_vvc_w_avg_8_rvv_128;
# endif
DMVR_INIT(8, rvv_128);
break;
default:
break;