sws/input: R-V V 32-bit RGB to halved UV

T-Head C908:
abgr_to_uv_half_8_c:            2.2
abgr_to_uv_half_8_rvv_i32:      3.5
abgr_to_uv_half_128_c:         44.0
abgr_to_uv_half_128_rvv_i32:   13.0
abgr_to_uv_half_1080_c:       245.0
abgr_to_uv_half_1080_rvv_i32: 107.2
abgr_to_uv_half_1920_c:       406.2
abgr_to_uv_half_1920_rvv_i32: 188.7
bgra_to_uv_half_8_c:            2.2
bgra_to_uv_half_8_rvv_i32:      3.5
bgra_to_uv_half_128_c:         26.5
bgra_to_uv_half_128_rvv_i32:   13.0
bgra_to_uv_half_1080_c:       219.7
bgra_to_uv_half_1080_rvv_i32: 107.0
bgra_to_uv_half_1920_c:       406.7
bgra_to_uv_half_1920_rvv_i32: 188.7

SpacemiT X60:
abgr_to_uv_half_8_c:           2.2
abgr_to_uv_half_8_rvv_i32:     3.0
abgr_to_uv_half_128_c:        28.2
abgr_to_uv_half_128_rvv_i32:   5.7
abgr_to_uv_half_1080_c:      235.5
abgr_to_uv_half_1080_rvv_i32: 47.7
abgr_to_uv_half_1920_c:      418.2
abgr_to_uv_half_1920_rvv_i32: 84.0
bgra_to_uv_half_8_c:           2.0
bgra_to_uv_half_8_rvv_i32:     3.0
bgra_to_uv_half_128_c:        23.7
bgra_to_uv_half_128_rvv_i32:   5.7
bgra_to_uv_half_1080_c:      195.5
bgra_to_uv_half_1080_rvv_i32: 47.7
bgra_to_uv_half_1920_c:      346.5
bgra_to_uv_half_1920_rvv_i32: 84.0
This commit is contained in:
Rémi Denis-Courmont 2024-06-06 21:15:08 +03:00
parent e2f069905e
commit 7a3369398f
2 changed files with 73 additions and 4 deletions

View File

@ -242,6 +242,67 @@ func ff_\chr0\()ToUV_rvv, zve32x
ret
endfunc
func ff_\chr1\()ToUV_half_rvv, zve32x
lw t1, 20(a6) # BU
lw t4, 32(a6) # BV
lw t3, 12(a6) # RU
lw t6, 24(a6) # RV
j 1f
endfunc
func ff_\chr0\()ToUV_half_rvv, zve32x
lw t1, 12(a6) # RU
lw t4, 24(a6) # RV
lw t3, 20(a6) # BU
lw t6, 32(a6) # BV
1:
lw t2, 16(a6) # GU
lw t5, 28(a6) # GV
li a6, 0xff
li a7, (256 << 15) + (1 << (15 - 6))
2:
vsetvli t0, a5, e32, m4, ta, ma
vlseg2e32.v v0, (a3)
sub a5, a5, t0
.if \high
vsrl.vi v8, v0, 24
vsrl.vi v12, v4, 24
.else
vand.vx v8, v0, a6
vand.vx v12, v4, a6
.endif
sh3add a3, t0, a3
vsrl.vi v16, v0, 8 * (1 + \high)
vsrl.vi v20, v4, 8 * (1 + \high)
vsrl.vi v24, v0, 8 * (2 - \high)
vsrl.vi v28, v4, 8 * (2 - \high)
vand.vx v16, v16, a6
vand.vx v20, v20, a6
vand.vx v24, v24, a6
vand.vx v28, v28, a6
vadd.vv v8, v8, v12
vadd.vv v16, v16, v20
vadd.vv v24, v24, v28
vmul.vx v0, v8, t1
vmul.vx v4, v8, t4
vmacc.vx v0, t2, v16
vmacc.vx v4, t5, v16
vmacc.vx v0, t3, v24
vmacc.vx v4, t6, v24
vadd.vx v0, v0, a7
vadd.vx v4, v4, a7
vsetvli zero, zero, e16, m2, ta, ma
vnsra.wi v0, v0, 15 - 5
vnsra.wi v2, v4, 15 - 5
vse16.v v0, (a0)
sh1add a0, t0, a0
vse16.v v2, (a1)
sh1add a1, t0, a1
bnez a5, 2b
ret
endfunc
.endm
rgba_input rgba32, bgra32, 0

View File

@ -47,13 +47,17 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
switch (c->srcFormat) {
case AV_PIX_FMT_ABGR:
c->lumToYV12 = ff_abgr32ToY_rvv;
if (!c->chrSrcHSubSample)
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_abgr32ToUV_half_rvv;
else
c->chrToYV12 = ff_abgr32ToUV_rvv;
break;
case AV_PIX_FMT_ARGB:
c->lumToYV12 = ff_argb32ToY_rvv;
if (!c->chrSrcHSubSample)
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_argb32ToUV_half_rvv;
else
c->chrToYV12 = ff_argb32ToUV_rvv;
break;
@ -67,7 +71,9 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
case AV_PIX_FMT_BGRA:
c->lumToYV12 = ff_bgra32ToY_rvv;
if (!c->chrSrcHSubSample)
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_bgra32ToUV_half_rvv;
else
c->chrToYV12 = ff_bgra32ToUV_rvv;
break;
@ -81,7 +87,9 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
case AV_PIX_FMT_RGBA:
c->lumToYV12 = ff_rgba32ToY_rvv;
if (!c->chrSrcHSubSample)
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_rgba32ToUV_half_rvv;
else
c->chrToYV12 = ff_rgba32ToUV_rvv;
break;
}