sws/input: R-V V 32-bit RGB to Y

T-Head C908:
abgr_to_y_8_c:            2.5
abgr_to_y_8_rvv_i32:      2.2
abgr_to_y_128_c:         37.0
abgr_to_y_128_rvv_i32:    8.5
abgr_to_y_1080_c:       327.0
abgr_to_y_1080_rvv_i32:  69.5
abgr_to_y_1920_c:       552.0
abgr_to_y_1920_rvv_i32: 122.2
bgra_to_y_8_c:            2.5
bgra_to_y_8_rvv_i32:      2.2
bgra_to_y_128_c:         37.2
bgra_to_y_128_rvv_i32:    8.5
bgra_to_y_1080_c:       310.2
bgra_to_y_1080_rvv_i32:  69.5
bgra_to_y_1920_c:       568.2
bgra_to_y_1920_rvv_i32: 122.5

SpacemiT X60:
abgr_to_y_8_c:            2.5
abgr_to_y_8_rvv_i32:      2.0
abgr_to_y_128_c:         33.0
abgr_to_y_128_rvv_i32:    3.7
abgr_to_y_1080_c:       276.0
abgr_to_y_1080_rvv_i32:  31.5
abgr_to_y_1920_c:       493.7
abgr_to_y_1920_rvv_i32:  55.5
bgra_to_y_8_c:            2.2
bgra_to_y_8_rvv_i32:      2.0
bgra_to_y_128_c:         33.0
bgra_to_y_128_rvv_i32:    3.7
bgra_to_y_1080_c:       276.0
bgra_to_y_1080_rvv_i32:  31.5
bgra_to_y_1920_c:       490.7
bgra_to_y_1920_rvv_i32:  55.5
This commit is contained in:
Rémi Denis-Courmont 2024-06-06 17:49:21 +03:00
parent 8b62fb231a
commit f5555cb106
2 changed files with 77 additions and 14 deletions

View File

@ -149,3 +149,48 @@ func ff_rgb24ToUV_half_rvv, zve32x
ret
endfunc
.macro rgba_input chr0, chr1, high
func ff_\chr1\()ToY_rvv, zve32x
lw t1, 8(a5) # BY
lw t3, 0(a5) # RY
j 1f
endfunc
func ff_\chr0\()ToY_rvv, zve32x
lw t1, 0(a5) # RY
lw t3, 8(a5) # BY
1:
lw t2, 4(a5) # GY
li t4, (32 << (15 - 1)) + (1 << (15 - 7))
li t5, 0xff
2:
vsetvli t0, a4, e32, m8, ta, ma
vle32.v v0, (a1)
sub a4, a4, t0
.if \high
vsrl.vi v8, v0, 24
.else
vand.vx v8, v0, t5
.endif
sh2add a1, t0, a1
vsrl.vi v16, v0, 8 * (1 + \high)
vmul.vx v24, v8, t1
vand.vx v16, v16, t5
vsrl.vi v8, v0, 8 * (2 - \high)
vmacc.vx v24, t2, v16
vand.vx v8, v8, t5
vadd.vx v24, v24, t4
vmacc.vx v24, t3, v8
vsetvli zero, zero, e16, m4, ta, ma
vnsra.wi v0, v24, 15 - 6
vse16.v v0, (a0)
sh1add a0, t0, a0
bnez a4, 2b
ret
endfunc
.endm
rgba_input rgba32, bgra32, 0
rgba_input abgr32, argb32, 1

View File

@ -21,20 +21,22 @@
#include "libavutil/riscv/cpu.h"
#include "libswscale/swscale_internal.h"
void ff_bgr24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *,
const uint8_t *, int width, uint32_t *coeffs, void *);
void ff_bgr24ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, const uint8_t *,
const uint8_t *, int width, uint32_t *coeffs, void *);
void ff_bgr24ToUV_half_rvv(uint8_t *, uint8_t *, const uint8_t *,
const uint8_t *, const uint8_t *, int width,
uint32_t *coeffs, void *);
void ff_rgb24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *,
const uint8_t *, int width, uint32_t *coeffs, void *);
void ff_rgb24ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, const uint8_t *,
const uint8_t *, int width, uint32_t *coeffs, void *);
void ff_rgb24ToUV_half_rvv(uint8_t *, uint8_t *, const uint8_t *,
const uint8_t *, const uint8_t *, int width,
uint32_t *coeffs, void *);
#define RVV_INPUT(name) \
void ff_##name##ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *, \
const uint8_t *, int w, uint32_t *coeffs, void *); \
void ff_##name##ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, \
const uint8_t *, const uint8_t *, int w, \
uint32_t *coeffs, void *); \
void ff_##name##ToUV_half_rvv(uint8_t *, uint8_t *, const uint8_t *, \
const uint8_t *, const uint8_t *, int w, \
uint32_t *coeffs, void *)
RVV_INPUT(abgr32);
RVV_INPUT(argb32);
RVV_INPUT(bgr24);
RVV_INPUT(bgra32);
RVV_INPUT(rgb24);
RVV_INPUT(rgba32);
av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
{
@ -43,6 +45,14 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
switch (c->srcFormat) {
case AV_PIX_FMT_ABGR:
c->lumToYV12 = ff_abgr32ToY_rvv;
break;
case AV_PIX_FMT_ARGB:
c->lumToYV12 = ff_argb32ToY_rvv;
break;
case AV_PIX_FMT_BGR24:
c->lumToYV12 = ff_bgr24ToY_rvv;
if (c->chrSrcHSubSample)
@ -51,6 +61,10 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
c->chrToYV12 = ff_bgr24ToUV_rvv;
break;
case AV_PIX_FMT_BGRA:
c->lumToYV12 = ff_bgra32ToY_rvv;
break;
case AV_PIX_FMT_RGB24:
c->lumToYV12 = ff_rgb24ToY_rvv;
if (c->chrSrcHSubSample)
@ -58,6 +72,10 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
else
c->chrToYV12 = ff_rgb24ToUV_rvv;
break;
case AV_PIX_FMT_RGBA:
c->lumToYV12 = ff_rgba32ToY_rvv;
break;
}
}
#endif