lavc/jpeg2000dsp: R-V V rct_int

jpeg2000_rct_int_c:       2592.2
jpeg2000_rct_int_rvv_i32: 1154.2
This commit is contained in:
Rémi Denis-Courmont 2023-10-28 21:55:16 +03:00
parent 73dea2bb91
commit 28840cf499
2 changed files with 29 additions and 2 deletions

View File

@ -24,13 +24,17 @@
#include "libavcodec/jpeg2000dsp.h"
void ff_ict_float_rvv(void *src0, void *src1, void *src2, int csize);
void ff_rct_int_rvv(void *src0, void *src1, void *src2, int csize);
av_cold void ff_jpeg2000dsp_init_riscv(Jpeg2000DSPContext *c)
{
#if HAVE_RVV
int flags = av_get_cpu_flags();
if ((flags & AV_CPU_FLAG_RVV_F32) && (flags & AV_CPU_FLAG_RVB_ADDR))
c->mct_decode[FF_DWT97] = ff_ict_float_rvv;
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
if (flags & AV_CPU_FLAG_RVV_F32)
c->mct_decode[FF_DWT97] = ff_ict_float_rvv;
c->mct_decode[FF_DWT53] = ff_rct_int_rvv;
}
#endif
}

View File

@ -47,3 +47,26 @@ func ff_ict_float_rvv, zve32f
ret
endfunc
func ff_rct_int_rvv, zve32x
1:
vsetvli t0, a3, e32, m8, ta, ma
vle32.v v16, (a1)
sub a3, a3, t0
vle32.v v24, (a2)
vle32.v v8, (a0)
vadd.vv v0, v16, v24
vsra.vi v0, v0, 2
vsub.vv v0, v8, v0
vadd.vv v8, v0, v24
vadd.vv v24, v0, v16
vse32.v v8, (a0)
sh2add a0, t0, a0
vse32.v v0, (a1)
sh2add a1, t0, a1
vse32.v v24, (a2)
sh2add a2, t0, a2
bnez a3, 1b
ret
endfunc