lavc/vc1dsp: fuse multiply-adds in R-V V inv_trans_4

T-Head C908 (cycles):            before   after
vc1dsp.vc1_inv_trans_4x4_rvv_i32: 128.0   120.0
vc1dsp.vc1_inv_trans_4x8_rvv_i32: 244.0   240.0
vc1dsp.vc1_inv_trans_8x4_rvv_i32: 239.2   235.2
This commit is contained in:
Rémi Denis-Courmont 2024-06-30 14:15:33 +03:00
parent d69e522523
commit 78e1565f84

View File

@ -194,14 +194,12 @@ func ff_vc1_inv_trans_4_rvv, zve32x
li t4, 22
vmul.vx v10, v2, t3
li t2, 10
vmul.vx v14, v1, t4
vmul.vx v26, v1, t4
vmul.vx v27, v3, t4
vadd.vv v24, v8, v10 # t1
vsub.vv v25, v8, v10 # t2
vmul.vx v16, v3, t2
vmul.vx v18, v3, t4
vmul.vx v20, v1, t2
vadd.vv v26, v14, v16 # t3
vsub.vv v27, v18, v20 # t4
vmacc.vx v26, t2, v3 # t3
vnmsac.vx v27, t2, v1 # t4
vwadd.vv v8, v24, v26
vwsub.vv v10, v25, v27
vwadd.vv v12, v25, v27