mirror of https://git.ffmpeg.org/ffmpeg.git
lavc/vc1dsp: use saturating arithmetic for RVV inv_trans_dc
T-Head C908 (cycles): vc1dsp.vc1_inv_trans_4x4_dc_c: 113.7 vc1dsp.vc1_inv_trans_4x4_dc_rvv_i32: 46.5 (before) vc1dsp.vc1_inv_trans_4x4_dc_rvv_i32: 45.5 (after) vc1dsp.vc1_inv_trans_4x8_dc_c: 230.7 vc1dsp.vc1_inv_trans_4x8_dc_rvv_i32: 65.7 (before) vc1dsp.vc1_inv_trans_4x8_dc_rvv_i32: 52.5 (after) vc1dsp.vc1_inv_trans_8x4_dc_c: 246.7 vc1dsp.vc1_inv_trans_8x4_dc_rvv_i64: 56.7 (before) vc1dsp.vc1_inv_trans_8x4_dc_rvv_i64: 45.5 (after) vc1dsp.vc1_inv_trans_8x8_dc_c: 419.7 vc1dsp.vc1_inv_trans_8x8_dc_rvv_i64: 81.2 (before) vc1dsp.vc1_inv_trans_8x8_dc_rvv_i64: 53.5 (after)
This commit is contained in:
parent
784a72a116
commit
324eba69f7
|
@ -38,18 +38,20 @@ func ff_vc1_inv_trans_\cols\()x\rows\()_dc_rvv, zve64x, zba
|
|||
.endif
|
||||
addi t2, t2, 64
|
||||
srai t2, t2, 7
|
||||
.if \rows * \cols == 64
|
||||
vsetvli zero, t0, e16, m8, ta, ma
|
||||
.elseif \rows * \cols == 32
|
||||
vsetvli zero, t0, e16, m4, ta, ma
|
||||
.if \rows * \cols >= 32
|
||||
vsetvli zero, t0, e8, m\mat_lmul, ta, ma
|
||||
.else
|
||||
vsetivli zero, \rows * \cols, e16, m2, ta, ma
|
||||
vsetivli zero, \rows * \cols, e8, m\mat_lmul, ta, ma
|
||||
.endif
|
||||
vzext.vf2 v8, v0
|
||||
vadd.vx v8, v8, t2
|
||||
vmax.vx v8, v8, zero
|
||||
vsetvli zero, zero, e8, m\mat_lmul, ta, ma
|
||||
vnclipu.wi v0, v8, 0
|
||||
bgez t2, 1f
|
||||
|
||||
neg t2, t2
|
||||
vssubu.vx v0, v0, t2
|
||||
vsetivli zero, \rows, e8, m\row_lmul, ta, ma
|
||||
vsse\w\().v v0, (a0), a1
|
||||
ret
|
||||
1:
|
||||
vsaddu.vx v0, v0, t2
|
||||
vsetivli zero, \rows, e8, m\row_lmul, ta, ma
|
||||
vsse\w\().v v0, (a0), a1
|
||||
ret
|
||||
|
|
Loading…
Reference in New Issue