lavc/aacpsdsp: RISC-V V stereo_interpolate[0]

This commit is contained in:
Rémi Denis-Courmont 2022-09-26 17:52:51 +03:00 committed by Lynne
parent a15edb0bc0
commit c03f9654c9
2 changed files with 60 additions and 0 deletions

View File

@ -34,6 +34,9 @@ void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64],
void ff_ps_hybrid_synthesis_deint_rvv(float out[2][38][64], float (*in)[32][2],
int i, int len);
void ff_ps_stereo_interpolate_rvv(float (*l)[2], float (*r)[2],
float h[2][4], float h_step[2][4], int len);
av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
{
#if HAVE_RVV
@ -43,6 +46,7 @@ av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
c->add_squares = ff_ps_add_squares_rvv;
c->mul_pair_single = ff_ps_mul_pair_single_rvv;
c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
c->stereo_interpolate[0] = ff_ps_stereo_interpolate_rvv;
}
if (flags & AV_CPU_FLAG_RVV_I32) {

View File

@ -219,3 +219,59 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve32x
3:
ret
endfunc
func ff_ps_stereo_interpolate_rvv, zve32f
vsetvli t0, zero, e32, m1, ta, ma
vid.v v24
flw ft0, (a2)
vadd.vi v24, v24, 1 // v24[i] = i + 1
flw ft1, 4(a2)
vfcvt.f.xu.v v24, v24
flw ft2, 8(a2)
vfmv.v.f v16, ft0
flw ft3, 12(a2)
vfmv.v.f v17, ft1
flw ft0, (a3)
vfmv.v.f v18, ft2
flw ft1, 4(a3)
vfmv.v.f v19, ft3
flw ft2, 8(a3)
vfmv.v.f v20, ft0
flw ft3, 12(a3)
vfmv.v.f v21, ft1
fcvt.s.wu ft4, t0 // (float)(vlenb / sizeof (float))
vfmv.v.f v22, ft2
fmul.s ft0, ft0, ft4
vfmv.v.f v23, ft3
fmul.s ft1, ft1, ft4
vfmacc.vv v16, v24, v20 // h0 += (i + 1) * h0_step
fmul.s ft2, ft2, ft4
vfmacc.vv v17, v24, v21
fmul.s ft3, ft3, ft4
vfmacc.vv v18, v24, v22
vfmacc.vv v19, v24, v23
1:
vsetvli t0, a4, e32, m1, ta, ma
vlseg2e32.v v8, (a0) // v8:l_re, v9:l_im
sub a4, a4, t0
vlseg2e32.v v10, (a1) // v10:r_re, v11:r_im
vfmul.vv v12, v8, v16
vfmul.vv v13, v9, v16
vfmul.vv v14, v8, v17
vfmul.vv v15, v9, v17
vfmacc.vv v12, v10, v18
vfmacc.vv v13, v11, v18
vfmacc.vv v14, v10, v19
vfmacc.vv v15, v11, v19
vsseg2e32.v v12, (a0)
sh3add a0, t0, a0
vsseg2e32.v v14, (a1)
sh3add a1, t0, a1
vfadd.vf v16, v16, ft0 // h0 += (vlenb / sizeof (float)) * h0_step
vfadd.vf v17, v17, ft1
vfadd.vf v18, v18, ft2
vfadd.vf v19, v19, ft3
bnez a4, 1b
ret
endfunc