lavu/float_dsp: R-V V scalarproduct_double

C908:
scalarproduct_double_c:       39.2
scalarproduct_double_rvv_f64: 10.5

X60:
scalarproduct_double_c:       35.0
scalarproduct_double_rvv_f64:  5.2
This commit is contained in:
Rémi Denis-Courmont 2024-05-29 17:57:09 +03:00
parent b114952030
commit 9238f6cb41
2 changed files with 24 additions and 0 deletions

View File

@ -46,6 +46,8 @@ void ff_vector_dmac_scalar_rvv(double *dst, const double *src, double mul,
int len);
void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
int len);
double ff_scalarproduct_double_rvv(const double *v1, const double *v2,
size_t len);
av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
{
@ -68,6 +70,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_dmul = ff_vector_dmul_rvv;
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_rvv;
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
fdsp->scalarproduct_double = ff_scalarproduct_double_rvv;
}
}
#endif

View File

@ -249,3 +249,24 @@ NOHWD mv a2, a3
ret
endfunc
func ff_scalarproduct_double_rvv, zve64f
vsetvli t0, zero, e64, m8, ta, ma
vmv.v.x v8, zero
vmv.s.x v0, zero
1:
vsetvli t0, a2, e64, m8, tu, ma
vle64.v v16, (a0)
sub a2, a2, t0
vle64.v v24, (a1)
sh3add a0, t0, a0
vfmacc.vv v8, v16, v24
sh3add a1, t0, a1
bnez a2, 1b
vsetvli t0, zero, e64, m8, ta, ma
vfredusum.vs v0, v8, v0
vfmv.f.s fa0, v0
NOHWD fmv.x.w a0, fa0
ret
endfunc