lavc/vp8dsp: R-V V vp8_idct_dc_add4y

c908:
vp8_idct_dc_add4y_c: 368.5
vp8_idct_dc_add4y_rvv_i32: 134.5

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
This commit is contained in:
sunyuechi 2024-02-02 12:50:07 +08:00 committed by Rémi Denis-Courmont
parent c12053cefc
commit e74e18cae4
2 changed files with 18 additions and 0 deletions

View File

@ -26,6 +26,7 @@
#include "libavcodec/vp8dsp.h"
void ff_vp8_idct_dc_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
void ff_vp8_idct_dc_add4y_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
{
@ -34,6 +35,7 @@ av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_rvv;
c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_rvv;
}
#endif
}

View File

@ -36,9 +36,25 @@
vsse32.v v0, (a0), a2
.endm
.macro vp8_idct_dc_addy
vp8_idct_dc_add
addi a0, a0, 4
addi a1, a1, 32
.endm
func ff_vp8_idct_dc_add_rvv, zve32x
vsetivli zero, 4, e8, mf4, ta, ma
vp8_idct_dc_add
ret
endfunc
func ff_vp8_idct_dc_add4y_rvv, zve32x
vsetivli zero, 4, e8, mf4, ta, ma
.rept 3
vp8_idct_dc_addy
.endr
vp8_idct_dc_add
ret
endfunc