sws/rgb2rgb: RISC-V V shuffle_bytes_xxxx functions

This commit is contained in:
Rémi Denis-Courmont 2022-09-28 18:29:59 +03:00 committed by Lynne
parent e5f6918cbd
commit 66a03f4053
5 changed files with 130 additions and 0 deletions

View File

@ -139,6 +139,8 @@ av_cold void ff_sws_rgb2rgb_init(void)
rgb2rgb_init_c();
#if ARCH_AARCH64
rgb2rgb_init_aarch64();
#elif ARCH_RISCV
rgb2rgb_init_riscv();
#elif ARCH_X86
rgb2rgb_init_x86();
#elif ARCH_LOONGARCH64

View File

@ -167,6 +167,7 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
void ff_sws_rgb2rgb_init(void);
void rgb2rgb_init_aarch64(void);
void rgb2rgb_init_riscv(void);
void rgb2rgb_init_x86(void);
void rgb2rgb_init_loongarch(void);

View File

@ -0,0 +1,2 @@
OBJS += riscv/rgb2rgb.o
RVV-OBJS += riscv/rgb2rgb_rvv.o

View File

@ -0,0 +1,47 @@
/*
* Copyright © 2022 Rémi Denis-Courmont.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libswscale/rgb2rgb.h"
void ff_shuffle_bytes_0321_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_shuffle_bytes_2103_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_shuffle_bytes_1230_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_shuffle_bytes_3012_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len);
av_cold void rgb2rgb_init_riscv(void)
{
#if HAVE_RVV
int flags = av_get_cpu_flags();
if (flags & AV_CPU_FLAG_RVV_I32) {
shuffle_bytes_0321 = ff_shuffle_bytes_0321_rvv;
shuffle_bytes_2103 = ff_shuffle_bytes_2103_rvv;
shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv;
shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv;
}
#endif
}

View File

@ -0,0 +1,78 @@
/*
* Copyright © 2022 Rémi Denis-Courmont.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/riscv/asm.S"
func ff_shuffle_bytes_0321_rvv, zve32x
addi t1, a0, 3
addi t2, a0, 2
addi t3, a0, 1
1:
srai a2, a2, 2
li t4, 4
2:
vsetvli t0, a2, e8, m1, ta, ma
sub a2, a2, t0
vlse8.v v8, (a0), t4
sh2add a0, t0, a0
vlse8.v v9, (t1), t4
sh2add t1, t0, t1
vlse8.v v10, (t2), t4
sh2add t2, t0, t2
vlse8.v v11, (t3), t4
sh2add t3, t0, t3
vsseg4e8.v v8, (a1)
sh2add a1, t0, a1
bnez a2, 2b
ret
endfunc
func ff_shuffle_bytes_2103_rvv, zve32x
addi t1, a0, 1
addi t2, a0, 0
addi t3, a0, 3
addi a0, a0, 2
j 1b
endfunc
func ff_shuffle_bytes_1230_rvv, zve32x
addi t1, a0, 2
addi t2, a0, 3
addi t3, a0, 0
addi a0, a0, 1
j 1b
endfunc
func ff_shuffle_bytes_3012_rvv, zve32x
addi t1, a0, 0
addi t2, a0, 1
addi t3, a0, 2
addi a0, a0, 3
j 1b
endfunc
func ff_shuffle_bytes_3210_rvv, zve32x
addi t1, a0, 2
addi t2, a0, 1
addi t3, a0, 0
addi a0, a0, 3
j 1b
endfunc