lavc/vp8dsp: R-V V put_bilin_hv

C908:
vp8_put_bilin4_hv_c: 561.0
vp8_put_bilin4_hv_rvv_i32: 232.7
vp8_put_bilin8_hv_c: 2162.7
vp8_put_bilin8_hv_rvv_i32: 506.7
vp8_put_bilin16_hv_c: 4769.7
vp8_put_bilin16_hv_rvv_i32: 556.7

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
This commit is contained in:
sunyuechi 2024-05-08 00:54:06 +08:00 committed by Rémi Denis-Courmont
parent bb5039b3cb
commit 538f217bbb
2 changed files with 39 additions and 0 deletions

View File

@ -67,6 +67,19 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
c->put_vp8_bilinear_pixels_tab[1][2][0] = ff_put_vp8_bilin8_v_rvv;
c->put_vp8_bilinear_pixels_tab[2][1][0] = ff_put_vp8_bilin4_v_rvv;
c->put_vp8_bilinear_pixels_tab[2][2][0] = ff_put_vp8_bilin4_v_rvv;
c->put_vp8_bilinear_pixels_tab[0][1][1] = ff_put_vp8_bilin16_hv_rvv;
c->put_vp8_bilinear_pixels_tab[0][1][2] = ff_put_vp8_bilin16_hv_rvv;
c->put_vp8_bilinear_pixels_tab[0][2][1] = ff_put_vp8_bilin16_hv_rvv;
c->put_vp8_bilinear_pixels_tab[0][2][2] = ff_put_vp8_bilin16_hv_rvv;
c->put_vp8_bilinear_pixels_tab[1][1][1] = ff_put_vp8_bilin8_hv_rvv;
c->put_vp8_bilinear_pixels_tab[1][1][2] = ff_put_vp8_bilin8_hv_rvv;
c->put_vp8_bilinear_pixels_tab[1][2][1] = ff_put_vp8_bilin8_hv_rvv;
c->put_vp8_bilinear_pixels_tab[1][2][2] = ff_put_vp8_bilin8_hv_rvv;
c->put_vp8_bilinear_pixels_tab[2][1][1] = ff_put_vp8_bilin4_hv_rvv;
c->put_vp8_bilinear_pixels_tab[2][1][2] = ff_put_vp8_bilin4_hv_rvv;
c->put_vp8_bilinear_pixels_tab[2][2][1] = ff_put_vp8_bilin4_hv_rvv;
c->put_vp8_bilinear_pixels_tab[2][2][2] = ff_put_vp8_bilin4_hv_rvv;
}
#endif
#endif

View File

@ -116,7 +116,33 @@ func ff_put_vp8_bilin\len\()_\type\()_rvv, zve32x
endfunc
.endm
.macro put_vp8_bilin_hv len
func ff_put_vp8_bilin\len\()_hv_rvv, zve32x
vsetvlstatic8 \len
li t3, 8
sub t1, t3, a5
sub t2, t3, a6
li t4, 4
bilin_load v4, \len, h, a5
add a2, a2, a3
1:
addi a4, a4, -1
vwmulu.vx v20, v4, t2
bilin_load v4, \len, h, a5
vwmaccu.vx v20, a6, v4
vwaddu.wx v24, v20, t4
vnsra.wi v0, v24, 3
vse8.v v0, (a0)
add a2, a2, a3
add a0, a0, a1
bnez a4, 1b
ret
endfunc
.endm
.irp len 16,8,4
put_vp8_bilin_h_v \len h a5
put_vp8_bilin_h_v \len v a6
put_vp8_bilin_hv \len
.endr