lavu/float_dsp: unroll RISC-V V loops

butterflies_float_c: 1057.0
butterflies_float_rvv_f32: 351.0 (before)
butterflies_float_rvv_f32: 329.5 (after)

vector_dmac_scalar_c: 819.0
vector_dmac_scalar_rvv_f64: 670.5 (before)
vector_dmac_scalar_rvv_f64: 431.0 (after)

vector_dmul_c: 800.2
vector_dmul_rvv_f64: 541.5 (before)
vector_dmul_rvv_f64: 426.0 (after)

vector_dmul_scalar_c: 545.7
vector_dmul_scalar_rvv_f64: 670.7 (before)
vector_dmul_scalar_rvv_f64: 324.7 (after)

vector_fmac_scalar_c: 804.5
vector_fmac_scalar_rvv_f32: 412.7 (before)
vector_fmac_scalar_rvv_f32: 214.5 (after)

vector_fmul_c: 811.2
vector_fmul_rvv_f32: 285.7 (before)
vector_fmul_rvv_f32: 214.2 (after)

vector_fmul_add_c: 1313.0
vector_fmul_add_rvv_f32: 349.0 (before)
vector_fmul_add_rvv_f32: 290.2 (after)

vector_fmul_reverse_c: 815.7
vector_fmul_reverse_rvv_f32: 529.2 (before)
vector_fmul_reverse_rvv_f32: 515.7 (after)

vector_fmul_scalar_c: 546.0
vector_fmul_scalar_rvv_f32: 350.2 (before)
vector_fmul_scalar_rvv_f32: 169.5 (after)
This commit is contained in:
Rémi Denis-Courmont 2023-07-17 19:33:09 +03:00
parent effadce6c7
commit b710f881ce
1 changed files with 10 additions and 10 deletions

View File

@ -23,7 +23,7 @@
// (a0) = (a1) * (a2) [0..a3-1] // (a0) = (a1) * (a2) [0..a3-1]
func ff_vector_fmul_rvv, zve32f func ff_vector_fmul_rvv, zve32f
1: 1:
vsetvli t0, a3, e32, m1, ta, ma vsetvli t0, a3, e32, m8, ta, ma
vle32.v v16, (a1) vle32.v v16, (a1)
sub a3, a3, t0 sub a3, a3, t0
vle32.v v24, (a2) vle32.v v24, (a2)
@ -42,7 +42,7 @@ func ff_vector_fmac_scalar_rvv, zve32f
NOHWF fmv.w.x fa0, a2 NOHWF fmv.w.x fa0, a2
NOHWF mv a2, a3 NOHWF mv a2, a3
1: 1:
vsetvli t0, a2, e32, m1, ta, ma vsetvli t0, a2, e32, m8, ta, ma
slli t1, t0, 2 slli t1, t0, 2
vle32.v v24, (a1) vle32.v v24, (a1)
sub a2, a2, t0 sub a2, a2, t0
@ -61,7 +61,7 @@ func ff_vector_fmul_scalar_rvv, zve32f
NOHWF fmv.w.x fa0, a2 NOHWF fmv.w.x fa0, a2
NOHWF mv a2, a3 NOHWF mv a2, a3
1: 1:
vsetvli t0, a2, e32, m1, ta, ma vsetvli t0, a2, e32, m8, ta, ma
vle32.v v16, (a1) vle32.v v16, (a1)
sub a2, a2, t0 sub a2, a2, t0
vfmul.vf v16, v16, fa0 vfmul.vf v16, v16, fa0
@ -82,7 +82,7 @@ func ff_vector_fmul_window_rvv, zve32f
sh2add t3, t1, a3 sh2add t3, t1, a3
li t1, -4 // byte stride li t1, -4 // byte stride
1: 1:
vsetvli t2, a4, e32, m1, ta, ma vsetvli t2, a4, e32, m4, ta, ma
vle32.v v16, (a1) vle32.v v16, (a1)
slli t4, t2, 2 slli t4, t2, 2
vlse32.v v20, (a2), t1 vlse32.v v20, (a2), t1
@ -109,7 +109,7 @@ endfunc
// (a0) = (a1) * (a2) + (a3) [0..a4-1] // (a0) = (a1) * (a2) + (a3) [0..a4-1]
func ff_vector_fmul_add_rvv, zve32f func ff_vector_fmul_add_rvv, zve32f
1: 1:
vsetvli t0, a4, e32, m1, ta, ma vsetvli t0, a4, e32, m8, ta, ma
vle32.v v8, (a1) vle32.v v8, (a1)
sub a4, a4, t0 sub a4, a4, t0
vle32.v v16, (a2) vle32.v v16, (a2)
@ -131,7 +131,7 @@ func ff_vector_fmul_reverse_rvv, zve32f
li t2, -4 // byte stride li t2, -4 // byte stride
addi a2, a2, -4 addi a2, a2, -4
1: 1:
vsetvli t0, a3, e32, m1, ta, ma vsetvli t0, a3, e32, m8, ta, ma
slli t1, t0, 2 slli t1, t0, 2
vle32.v v16, (a1) vle32.v v16, (a1)
sub a3, a3, t0 sub a3, a3, t0
@ -149,7 +149,7 @@ endfunc
// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1] // (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
func ff_butterflies_float_rvv, zve32f func ff_butterflies_float_rvv, zve32f
1: 1:
vsetvli t0, a2, e32, m1, ta, ma vsetvli t0, a2, e32, m8, ta, ma
vle32.v v16, (a0) vle32.v v16, (a0)
sub a2, a2, t0 sub a2, a2, t0
vle32.v v24, (a1) vle32.v v24, (a1)
@ -187,7 +187,7 @@ endfunc
// (a0) = (a1) * (a2) [0..a3-1] // (a0) = (a1) * (a2) [0..a3-1]
func ff_vector_dmul_rvv, zve64d func ff_vector_dmul_rvv, zve64d
1: 1:
vsetvli t0, a3, e64, m1, ta, ma vsetvli t0, a3, e64, m8, ta, ma
vle64.v v16, (a1) vle64.v v16, (a1)
sub a3, a3, t0 sub a3, a3, t0
vle64.v v24, (a2) vle64.v v24, (a2)
@ -206,7 +206,7 @@ func ff_vector_dmac_scalar_rvv, zve64d
NOHWD fmv.d.x fa0, a2 NOHWD fmv.d.x fa0, a2
NOHWD mv a2, a3 NOHWD mv a2, a3
1: 1:
vsetvli t0, a2, e64, m1, ta, ma vsetvli t0, a2, e64, m8, ta, ma
vle64.v v24, (a1) vle64.v v24, (a1)
sub a2, a2, t0 sub a2, a2, t0
vle64.v v16, (a0) vle64.v v16, (a0)
@ -224,7 +224,7 @@ func ff_vector_dmul_scalar_rvv, zve64d
NOHWD fmv.d.x fa0, a2 NOHWD fmv.d.x fa0, a2
NOHWD mv a2, a3 NOHWD mv a2, a3
1: 1:
vsetvli t0, a2, e64, m1, ta, ma vsetvli t0, a2, e64, m8, ta, ma
vle64.v v16, (a1) vle64.v v16, (a1)
sub a2, a2, t0 sub a2, a2, t0
vfmul.vf v16, v16, fa0 vfmul.vf v16, v16, fa0