lavc/vp8dsp: use saturating add/sub for R-V V DC add

T-Head C908 (cycles):
vp7_idct_dc_add_c:          108.5
vp7_idct_dc_add_rvv_i32:     56.2 (before)
vp7_idct_dc_add_rvv_i32:     47.2 (after)
vp8_idct_dc_add_c:           96.2
vp8_idct_dc_add_rvv_i32:     43.0 (before)
vp8_idct_dc_add_rvv_i32:     34.0 (after)
This commit is contained in:
Rémi Denis-Courmont 2024-07-25 17:40:26 +03:00
parent bbfc0ac9ca
commit 9b4655c3a1
1 changed files with 12 additions and 6 deletions

View File

@ -172,12 +172,18 @@ func ff_vp78_idct_dc_add_rvv, zve32x
vsetivli zero, 4, e8, mf4, ta, ma
sh zero, (a1)
vlse32.v v8, (a0), a2
vsetivli zero, 16, e16, m2, ta, ma
vzext.vf2 v16, v8
vadd.vx v16, v16, a3
vmax.vx v16, v16, zero
vsetvli zero, zero, e8, m1, ta, ma
vnclipu.wi v8, v16, 0
vsetivli zero, 16, e8, m1, ta, ma
bgez a3, 1f
# block[0] < 0
neg a3, a3
vssubu.vx v8, v8, a3
vsetivli zero, 4, e8, mf4, ta, ma
vsse32.v v8, (a0), a2
ret
1: # block[0] >= 0
vsaddu.vx v8, v8, a3
vsetivli zero, 4, e8, mf4, ta, ma
vsse32.v v8, (a0), a2
ret