riscv: allow passing addend to vtype_vli macro

A constant (-1) is added to the length value, so we can have an added for free, and optimise the addition away if the addend is exactly 1.
2025-02-15 02:58:01 +00:00 · 2024-05-27 18:51:27 +03:00 · 2024-05-27 18:51:27 +03:00 · 4fe8f2cc43
commit 4fe8f2cc43
parent add8c46215
2 changed files with 7 additions and 4 deletions
--- a/libavcodec/riscv/lpc_rvv.S
+++ b/libavcodec/riscv/lpc_rvv.S
@ -87,8 +87,8 @@ func ff_lpc_apply_welch_window_rvv, zve64d
 endfunc

 func ff_lpc_compute_autocorr_rvv, zve64d, zbb
+        vtype_vli t1, a2, t2, e64, ta, ma, 1
        addi      a2, a2, 1
-        vtype_vli t1, a2, t2, e64, ta, ma
        li        t0, 1
        vsetvl    zero, a2, t1
        fcvt.d.l  ft0, t0
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@ -196,18 +196,21 @@
         * @param ew element width: e8, e16, e32 or e64
         * @param tp tail policy: tu or ta
         * @param mp mask policty: mu or ma
+         * @param addend optional addend for the vector length register
         */
-        .macro  vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu
+        .macro  vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu, addend=0
        parse_vtype \ew, \tp, \mp
        /*
         * The difference between the CLZ's notionally equals the VLMUL value
         * for 4-bit elements. But we want the value for SEW_MAX-bit elements.
         */
        slli    \tmp, \rs, 1 + VSEW_MAX
+        .if \addend - 1
+        addi    \tmp, \tmp, \addend - 1
+        .endif
        csrr    \rd, vlenb
-        addi    \tmp, \tmp, -1
-        clz     \rd, \rd
        clz     \tmp, \tmp
+        clz     \rd, \rd
        sub     \rd, \rd, \tmp
        max     \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
        .if     vsew < VSEW_MAX