sws/input: R-V V rgb24ToUV and bgr24ToUV

T-Head C908:
rgb24_to_uv_8_c:            2.7
rgb24_to_uv_8_rvv_i32:      3.2
rgb24_to_uv_128_c:         41.0
rgb24_to_uv_128_rvv_i32:   12.7
rgb24_to_uv_1080_c:       342.5
rgb24_to_uv_1080_rvv_i32: 105.7
rgb24_to_uv_1280_c:       406.0
rgb24_to_uv_1280_rvv_i32: 124.2
rgb24_to_uv_1920_c:       626.0
rgb24_to_uv_1920_rvv_i32: 186.0

SpacemiT X60:
rgb24_to_uv_8_c:            2.5
rgb24_to_uv_8_rvv_i32:      3.0
rgb24_to_uv_128_c:         36.5
rgb24_to_uv_128_rvv_i32:    5.7
rgb24_to_uv_1080_c:       304.2
rgb24_to_uv_1080_rvv_i32:  49.0
rgb24_to_uv_1280_c:       360.5
rgb24_to_uv_1280_rvv_i32:  57.5
rgb24_to_uv_1920_c:       540.7
rgb24_to_uv_1920_rvv_i32:  86.2
This commit is contained in:
Rémi Denis-Courmont 2024-06-05 18:36:42 +03:00
parent 79dfdac4db
commit 3ef5867e4b
2 changed files with 54 additions and 0 deletions

View File

@ -53,3 +53,49 @@ func ff_rgb24ToY_rvv, zve32x
ret ret
endfunc endfunc
func ff_bgr24ToUV_rvv, zve32x
lw t1, 20(a6) # BU
lw t4, 32(a6) # BV
lw t3, 12(a6) # RU
lw t6, 24(a6) # RV
j 1f
endfunc
func ff_rgb24ToUV_rvv, zve32x
lw t1, 12(a6) # RU
lw t4, 24(a6) # RV
lw t3, 20(a6) # BU
lw t6, 32(a6) # BV
1:
lw t2, 16(a6) # GU
lw t5, 28(a6) # GV
li a7, (256 << (15 - 1)) + (1 << (15 - 7))
2:
vsetvli t0, a5, e32, m8, ta, ma
vlseg3e8.v v0, (a3)
sub a5, a5, t0
vzext.vf4 v16, v0
sh1add a6, t0, t0
vzext.vf4 v24, v2
vmul.vx v8, v16, t1
add a3, a6, a3
vmul.vx v16, v16, t4
vmacc.vx v8, t2, v24
vmacc.vx v16, t5, v24
vzext.vf4 v24, v4
vadd.vx v8, v8, a7
vadd.vx v16, v16, a7
vmacc.vx v8, t3, v24
vmacc.vx v16, t6, v24
vsetvli zero, zero, e16, m4, ta, ma
vnsra.wi v0, v8, 15 - 6
vnsra.wi v4, v16, 15 - 6
vse16.v v0, (a0)
sh1add a0, t0, a0
vse16.v v4, (a1)
sh1add a1, t0, a1
bnez a5, 2b
ret
endfunc

View File

@ -23,8 +23,12 @@
void ff_bgr24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *, void ff_bgr24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *,
const uint8_t *, int width, uint32_t *coeffs, void *); const uint8_t *, int width, uint32_t *coeffs, void *);
void ff_bgr24ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, const uint8_t *,
const uint8_t *, int width, uint32_t *coeffs, void *);
void ff_rgb24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *, void ff_rgb24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *,
const uint8_t *, int width, uint32_t *coeffs, void *); const uint8_t *, int width, uint32_t *coeffs, void *);
void ff_rgb24ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, const uint8_t *,
const uint8_t *, int width, uint32_t *coeffs, void *);
av_cold void ff_sws_init_swscale_riscv(SwsContext *c) av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
{ {
@ -35,10 +39,14 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
switch (c->srcFormat) { switch (c->srcFormat) {
case AV_PIX_FMT_BGR24: case AV_PIX_FMT_BGR24:
c->lumToYV12 = ff_bgr24ToY_rvv; c->lumToYV12 = ff_bgr24ToY_rvv;
if (!c->chrSrcHSubSample)
c->chrToYV12 = ff_bgr24ToUV_rvv;
break; break;
case AV_PIX_FMT_RGB24: case AV_PIX_FMT_RGB24:
c->lumToYV12 = ff_rgb24ToY_rvv; c->lumToYV12 = ff_rgb24ToY_rvv;
if (!c->chrSrcHSubSample)
c->chrToYV12 = ff_rgb24ToUV_rvv;
break; break;
} }
} }