mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-11 18:09:36 +00:00
riscv: allow passing addend to vtype_vli macro
A constant (-1) is added to the length value, so we can have an added for free, and optimise the addition away if the addend is exactly 1.
This commit is contained in:
parent
add8c46215
commit
4fe8f2cc43
@ -87,8 +87,8 @@ func ff_lpc_apply_welch_window_rvv, zve64d
|
||||
endfunc
|
||||
|
||||
func ff_lpc_compute_autocorr_rvv, zve64d, zbb
|
||||
vtype_vli t1, a2, t2, e64, ta, ma, 1
|
||||
addi a2, a2, 1
|
||||
vtype_vli t1, a2, t2, e64, ta, ma
|
||||
li t0, 1
|
||||
vsetvl zero, a2, t1
|
||||
fcvt.d.l ft0, t0
|
||||
|
@ -196,18 +196,21 @@
|
||||
* @param ew element width: e8, e16, e32 or e64
|
||||
* @param tp tail policy: tu or ta
|
||||
* @param mp mask policty: mu or ma
|
||||
* @param addend optional addend for the vector length register
|
||||
*/
|
||||
.macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu
|
||||
.macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu, addend=0
|
||||
parse_vtype \ew, \tp, \mp
|
||||
/*
|
||||
* The difference between the CLZ's notionally equals the VLMUL value
|
||||
* for 4-bit elements. But we want the value for SEW_MAX-bit elements.
|
||||
*/
|
||||
slli \tmp, \rs, 1 + VSEW_MAX
|
||||
.if \addend - 1
|
||||
addi \tmp, \tmp, \addend - 1
|
||||
.endif
|
||||
csrr \rd, vlenb
|
||||
addi \tmp, \tmp, -1
|
||||
clz \rd, \rd
|
||||
clz \tmp, \tmp
|
||||
clz \rd, \rd
|
||||
sub \rd, \rd, \tmp
|
||||
max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
|
||||
.if vsew < VSEW_MAX
|
||||
|
Loading…
Reference in New Issue
Block a user