lavc/flacdsp: R-V Zvl256b lpc33

flac_lpc_33_13_c: 499.7 flac_lpc_33_13_rvv_i64: 197.7 flac_lpc_33_16_c: 601.5 flac_lpc_33_16_rvv_i64: 195.2 flac_lpc_33_29_c: 1011.5 flac_lpc_33_29_rvv_i64: 300.7 flac_lpc_33_32_c: 1099.0 flac_lpc_33_32_rvv_i64: 296.7
2024-05-13 23:20:46 +03:00 · 2024-05-13 23:20:46 +03:00 · a535ce2ac0
parent 5ebb071d79
commit a535ce2ac0
2 changed files with 32 additions and 2 deletions
--- a/libavcodec/riscv/flacdsp_init.c
+++ b/libavcodec/riscv/flacdsp_init.c
@ -31,6 +31,8 @@ void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32],
                       int pred_order, int qlevel, int len);
 void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32],
                              int pred_order, int qlevel, int len);
 void ff_flac_lpc33_rvv(int64_t *, const int32_t *, const int coeffs[32],
                       int pred_order, int qlevel, int len);
 void ff_flac_wasted32_rvv(int32_t *, int shift, int len);
 void ff_flac_wasted33_rvv(int64_t *, const int32_t *, int shift, int len);
 void ff_flac_decorrelate_indep2_16_rvv(uint8_t **out, int32_t **in,
@ -76,9 +78,10 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
 # if (__riscv_xlen >= 64)
            if (flags & AV_CPU_FLAG_RVV_I64) {
-                if (vlenb > 16)
+                if (vlenb > 16) {
                    c->lpc32 = ff_flac_lpc32_rvv_simple;
-                else
+                    c->lpc33 = ff_flac_lpc33_rvv;
                } else
                    c->lpc32 = ff_flac_lpc32_rvv;
            }
 # endif
--- a/libavcodec/riscv/flacdsp_rvv.S
+++ b/libavcodec/riscv/flacdsp_rvv.S
@ -103,6 +103,33 @@ func ff_flac_lpc32_rvv_simple, zve64x, zbb
        ret
 endfunc
 func ff_flac_lpc33_rvv, zve64x, zbb
        vtype_vli t0, a3, t1, e64, ta, ma
        vsetvl  zero, a3, t0
        vmv.s.x v0, zero
        sh2add  a1, a3, a1
        vle32.v v24, (a2)
        sub     a5, a5, a3
        vle64.v v16, (a0)
        sh3add  a0, a3, a0
        vsext.vf2 v8, v24
 1:
        vmul.vv v24, v8, v16
        lw      t0, (a1)
        addi    a1, a1, 4
        vredsum.vs v24, v24, v0
        addi    a5, a5, -1
        vmv.x.s t1, v24
        sra     t1, t1, a4
        add     t0, t0, t1
        vslide1down.vx v16, v16, t0
        sd      t0, (a0)
        addi    a0, a0, 8
        bnez    a5, 1b
        ret
 endfunc
 #endif
 func ff_flac_wasted32_rvv, zve32x