lavu/floatdsp: RISC-V V vector_fmul_window

2024-12-25 16:52:31 +00:00 · 2022-09-26 17:52:37 +03:00 · 2022-09-26 17:52:37 +03:00 · b493370662
commit b493370662
parent 9aeb6aca3a
2 changed files with 36 additions and 0 deletions
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@ -31,6 +31,8 @@ void ff_vector_fmac_scalar_rvv(float *dst, const float *src, float mul,
                                int len);
 void ff_vector_fmul_scalar_rvv(float *dst, const float *src, float mul,
                                int len);
 void ff_vector_fmul_window_rvv(float *dst, const float *src0,
                                const float *src1, const float *win, int len);
 void ff_vector_fmul_add_rvv(float *dst, const float *src0, const float *src1,
                             const float *src2, int len);
 void ff_vector_fmul_reverse_rvv(float *dst, const float *src0,
@ -53,6 +55,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
        fdsp->vector_fmul = ff_vector_fmul_rvv;
        fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_rvv;
        fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_rvv;
        fdsp->vector_fmul_window = ff_vector_fmul_window_rvv;
        fdsp->vector_fmul_add = ff_vector_fmul_add_rvv;
        fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_rvv;
        fdsp->butterflies_float = ff_butterflies_float_rvv;
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@ -74,6 +74,39 @@ NOHWF   mv       a2, a3
        ret
 endfunc
 func ff_vector_fmul_window_rvv, zve32f
        // a0: dst, a1: src0, a2: src1, a3: window, a4: length
        addi       t0, a4, -1
        add        t1, t0, a4
        sh2add     a2, t0, a2
        sh2add     t0, t1, a0
        sh2add     t3, t1, a3
        li         t1, -4 // byte stride
 1:
        vsetvli    t2, a4, e32, m1, ta, ma
        vle32.v    v16, (a1)
        slli       t4, t2, 2
        vlse32.v   v20, (a2), t1
        sub        a4, a4, t2
        vle32.v    v24, (a3)
        add        a1, a1, t4
        vlse32.v   v28, (t3), t1
        sub        a2, a2, t4
        vfmul.vv   v0, v16, v28
        add        a3, a3, t4
        vfmul.vv   v8, v16, v24
        sub        t3, t3, t4
        vfnmsac.vv v0, v20, v24
        vfmacc.vv  v8, v20, v28
        vse32.v    v0, (a0)
        add        a0, a0, t4
        vsse32.v   v8, (t0), t1
        sub        t0, t0, t4
        bnez       a4, 1b
        ret
 endfunc
 // (a0) = (a1) * (a2) + (a3) [0..a4-1]
 func ff_vector_fmul_add_rvv, zve32f
 1: