lavu/float_dsp: avoid reg-stride in R-V V fmul_window

This commit is contained in:
Rémi Denis-Courmont 2023-09-29 20:31:36 +03:00
parent fe6d46490f
commit 9240035c0e
1 changed files with 27 additions and 22 deletions

View File

@ -75,32 +75,37 @@ endfunc
func ff_vector_fmul_window_rvv, zve32f func ff_vector_fmul_window_rvv, zve32f
// a0: dst, a1: src0, a2: src1, a3: window, a4: length // a0: dst, a1: src0, a2: src1, a3: window, a4: length
addi t0, a4, -1 vsetvli t0, zero, e16, m4, ta, ma
add t1, t0, a4 sh2add a2, a4, a2
sh2add a2, t0, a2 vid.v v0
sh2add t0, t1, a0 sh3add t3, a4, a3
sh2add t3, t1, a3 vadd.vi v0, v0, 1
li t1, -4 // byte stride sh3add t0, a4, a0
1: 1:
vsetvli t2, a4, e32, m4, ta, ma vsetvli t2, a4, e16, m2, ta, ma
vle32.v v16, (a1)
slli t4, t2, 2 slli t4, t2, 2
vlse32.v v20, (a2), t1 vrsub.vx v2, v0, t2
sub a4, a4, t2
vle32.v v24, (a3)
add a1, a1, t4
vlse32.v v28, (t3), t1
sub a2, a2, t4
vfmul.vv v0, v16, v28
add a3, a3, t4
vfmul.vv v8, v16, v24
sub t3, t3, t4 sub t3, t3, t4
vfnmsac.vv v0, v20, v24 vsetvli zero, zero, e32, m4, ta, ma
vfmacc.vv v8, v20, v28 sub a2, a2, t4
vse32.v v0, (a0) vle32.v v8, (t3)
add a0, a0, t4
vsse32.v v8, (t0), t1
sub t0, t0, t4 sub t0, t0, t4
vle32.v v4, (a2)
sub a4, a4, t2
vrgatherei16.vv v28, v8, v2
vle32.v v16, (a1)
add a1, a1, t4
vrgatherei16.vv v20, v4, v2
vle32.v v24, (a3)
add a3, a3, t4
vfmul.vv v12, v16, v28
vfmul.vv v16, v16, v24
vfnmsac.vv v12, v20, v24
vfmacc.vv v16, v20, v28
vrgatherei16.vv v8, v16, v2
vse32.v v12, (a0)
add a0, a0, t4
vse32.v v8, (t0)
bnez a4, 1b bnez a4, 1b
ret ret