lavc/rv34dsp: R-V V rv34_idct_dc_add

C908:
rv34_idct_dc_add_c: 134.7
rv34_idct_dc_add_rvv_i32: 45.5

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
This commit is contained in:
sunyuechi 2024-01-31 19:04:11 +08:00 committed by Rémi Denis-Courmont
parent d133e52790
commit 89189dd9e7
2 changed files with 22 additions and 0 deletions

View File

@ -26,6 +26,7 @@
#include "libavcodec/rv34dsp.h"
void ff_rv34_inv_transform_dc_rvv(int16_t *block);
void ff_rv34_idct_dc_add_rvv(uint8_t *dst, ptrdiff_t stride, int dc);
av_cold void ff_rv34dsp_init_riscv(RV34DSPContext *c)
{
@ -34,6 +35,7 @@ av_cold void ff_rv34dsp_init_riscv(RV34DSPContext *c)
if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
c->rv34_inv_transform_dc = ff_rv34_inv_transform_dc_rvv;
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_rvv;
}
#endif
}

View File

@ -31,3 +31,23 @@ func ff_rv34_inv_transform_dc_rvv, zve32x
ret
endfunc
func ff_rv34_idct_dc_add_rvv, zve32x
vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v0, (a0), a1
li t1, 169
mul t1, t1, a2
li a2, 255
addi t1, t1, 512
srai t1, t1, 10
vsetivli zero, 4*4, e16, m2, ta, ma
vzext.vf2 v2, v0
vadd.vx v2, v2, t1
vmax.vx v2, v2, zero
vsetvli zero, zero, e8, m1, ta, ma
vnclipu.wi v0, v2, 0
vsetivli zero, 4, e8, mf4, ta, ma
vsse32.v v0, (a0), a1
ret
endfunc