lavc/vp7dsp: R-V V vp7_idct_add

Most of the code is shared with DC, thanks to minor earlier changes.

vp7_idct_add_c:       5.2
vp7_idct_add_rvv_i32: 2.5
This commit is contained in:
Rémi Denis-Courmont 2024-05-26 13:22:45 +03:00
parent 4a0e629b6f
commit fa3b153cb1
2 changed files with 31 additions and 0 deletions

View File

@ -26,6 +26,7 @@
#include "libavcodec/vp8dsp.h"
void ff_vp7_luma_dc_wht_rvv(int16_t block[4][4][16], int16_t dc[16]);
void ff_vp7_idct_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
av_cold void ff_vp7dsp_init_riscv(VP8DSPContext *c)
{
@ -37,6 +38,7 @@ av_cold void ff_vp7dsp_init_riscv(VP8DSPContext *c)
#if __riscv_xlen >= 64
c->vp8_luma_dc_wht = ff_vp7_luma_dc_wht_rvv;
#endif
c->vp8_idct_add = ff_vp7_idct_add_rvv;
}
#endif
}

View File

@ -97,4 +97,33 @@ func ff_vp7_luma_dc_wht_rvv, zve32x
vnclip.wi v7, v3, 18
jr t0
endfunc
func ff_vp7_idct_add_rvv, zve32x
jal t0, 1b
csrwi vxrm, 2
vsetvli zero, zero, e8, mf4, ta, ma
vle8.v v12, (a0)
vle8.v v13, (t1)
vwaddu.wv v4, v4, v12
vle8.v v14, (t2)
vwaddu.wv v5, v5, v13
vle8.v v15, (t3)
vwaddu.wv v6, v6, v14
vwaddu.wv v7, v7, v15
vsetvli zero, zero, e16, mf2, ta, ma
vmax.vx v4, v4, zero
vmax.vx v5, v5, zero
vmax.vx v6, v6, zero
vmax.vx v7, v7, zero
vsetvli zero, zero, e8, mf4, ta, ma
vnclipu.wi v0, v4, 0
vnclipu.wi v1, v5, 0
vse8.v v0, (a0)
vnclipu.wi v2, v6, 0
vse8.v v1, (t1)
vnclipu.wi v3, v7, 0
vse8.v v2, (t2)
vse8.v v3, (t3)
ret
endfunc
#endif