diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c index 32c1546827..37a2cd5ea1 100644 --- a/libswscale/riscv/rgb2rgb.c +++ b/libswscale/riscv/rgb2rgb.c @@ -33,6 +33,12 @@ void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len); void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, int width, int height, int s1stride, int s2stride, int dstride); +void ff_uyvytoyuv422_rvv(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + const uint8_t *src, int width, int height, + int ystride, int uvstride, int src_stride); +void ff_yuyvtoyuv422_rvv(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, + const uint8_t *src, int width, int height, + int ystride, int uvstride, int src_stride); av_cold void rgb2rgb_init_riscv(void) { @@ -46,6 +52,10 @@ av_cold void rgb2rgb_init_riscv(void) shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv; shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv; interleaveBytes = ff_interleave_bytes_rvv; +#if (__riscv_xlen == 64) + uyvytoyuv422 = ff_uyvytoyuv422_rvv; + yuyvtoyuv422 = ff_yuyvtoyuv422_rvv; +#endif } #endif } diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S index 7f8c2efd80..5626d906eb 100644 --- a/libswscale/riscv/rgb2rgb_rvv.S +++ b/libswscale/riscv/rgb2rgb_rvv.S @@ -102,3 +102,56 @@ func ff_interleave_bytes_rvv, zve32x ret endfunc + +#if (__riscv_xlen == 64) +.macro yuy2_to_i422p v_y0, v_y1, v_u, v_v + addi sp, sp, -16 + sd s0, (sp) + sd s1, 8(sp) + addi a4, a4, 1 + lw s0, 16(sp) + srai a4, a4, 1 // pixel width -> chroma width + li s1, 2 +1: + mv t4, a4 + mv t3, a3 + mv t0, a0 + addi t6, a0, 1 + mv t1, a1 + mv t2, a2 + addi a5, a5, -1 +2: + vsetvli t5, t4, e8, m1, ta, ma + sub t4, t4, t5 + vlseg4e8.v v8, (t3) + sh2add t3, t5, t3 + vsse8.v \v_y0, (t0), s1 + sh1add t0, t5, t0 + vsse8.v \v_y1, (t6), s1 + sh1add t6, t5, t6 + vse8.v \v_u, (t1) + add t1, t5, t1 + vse8.v \v_v, (t2) + add t2, t5, t2 + bnez t4, 2b + + add a3, a3, s0 + add a0, a0, a6 + add a1, a1, a7 + add a2, a2, a7 + bnez a5, 1b + + ld s1, 8(sp) + ld s0, (sp) + addi sp, sp, 16 + ret +.endm + +func ff_uyvytoyuv422_rvv, zve32x + yuy2_to_i422p v9, v11, v8, v10 +endfunc + +func ff_yuyvtoyuv422_rvv, zve32x + yuy2_to_i422p v8, v10, v9, v11 +endfunc +#endif