swscale/rgb2rgb2: rework RISC-V V shuffle_bytes_{1230,3012}

This avoids strided loads.

Before:
shuffle_bytes_1230_rvv_i32: 308.7
shuffle_bytes_3012_rvv_i32: 308.7

After:
shuffle_bytes_1230_rvv_i32: 46.7
shuffle_bytes_3012_rvv_i32: 46.7
This commit is contained in:
Rémi Denis-Courmont 2023-07-18 20:25:13 +03:00
parent 15982554e6
commit c2b38619c0
1 changed files with 20 additions and 10 deletions

View File

@ -49,19 +49,29 @@ func ff_shuffle_bytes_2103_rvv, zve32x
endfunc endfunc
func ff_shuffle_bytes_1230_rvv, zve32x func ff_shuffle_bytes_1230_rvv, zve32x
addi t1, a0, 2 li t1, 24
addi t2, a0, 3 li t2, 8
addi t3, a0, 0 j 3f
addi a0, a0, 1
j 1b
endfunc endfunc
func ff_shuffle_bytes_3012_rvv, zve32x func ff_shuffle_bytes_3012_rvv, zve32x
addi t1, a0, 0 li t1, 8
addi t2, a0, 1 li t2, 24
addi t3, a0, 2 3:
addi a0, a0, 3 srai a2, a2, 2
j 1b 4:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a0)
sub a2, a2, t0
vsll.vx v16, v8, t1
sh2add a0, t0, a0
vsrl.vx v8, v8, t2
vor.vv v16, v16, v8
vse32.v v16, (a1)
sh2add a1, t0, a1
bnez a2, 4b
ret
endfunc endfunc
func ff_shuffle_bytes_3210_rvv, zve32x func ff_shuffle_bytes_3210_rvv, zve32x