ffmpeg/libswscale/riscv/swscale.c
Rémi Denis-Courmont 417957ec5e sws/range_convert: R-V V to/from JPEG
C908   X60
chrRangeFromJpeg_8_c:          2.7    2.5
chrRangeFromJpeg_8_rvv_i32:    1.7    1.5
chrRangeFromJpeg_24_c:         7.5    6.7
chrRangeFromJpeg_24_rvv_i32:   1.7    1.5
chrRangeFromJpeg_128_c:       55.2   34.7
chrRangeFromJpeg_128_rvv_i32:  6.5    3.0
chrRangeFromJpeg_144_c:       44.0   39.2
chrRangeFromJpeg_144_rvv_i32:  7.7    4.5
chrRangeFromJpeg_256_c:       78.2   69.5
chrRangeFromJpeg_256_rvv_i32: 12.2    6.0
chrRangeFromJpeg_512_c:      172.2  138.5
chrRangeFromJpeg_512_rvv_i32: 24.5   11.7
chrRangeToJpeg_8_c:            4.7    4.2
chrRangeToJpeg_8_rvv_i32:      2.0    1.7
chrRangeToJpeg_24_c:          13.7   12.2
chrRangeToJpeg_24_rvv_i32:     2.0    1.5
chrRangeToJpeg_128_c:         72.0   63.7
chrRangeToJpeg_128_rvv_i32:    6.7    3.2
chrRangeToJpeg_144_c:         80.7   71.7
chrRangeToJpeg_144_rvv_i32:    8.5    4.7
chrRangeToJpeg_256_c:        143.2  127.2
chrRangeToJpeg_256_rvv_i32:   13.5    6.5
chrRangeToJpeg_512_c:        285.7  253.7
chrRangeToJpeg_512_rvv_i32:   27.0   13.0
lumRangeFromJpeg_8_c:          1.7    1.5
lumRangeFromJpeg_8_rvv_i32:    1.2    1.0
lumRangeFromJpeg_24_c:         4.2    3.7
lumRangeFromJpeg_24_rvv_i32:   1.2    1.0
lumRangeFromJpeg_128_c:       21.7   19.2
lumRangeFromJpeg_128_rvv_i32:  3.7    1.7
lumRangeFromJpeg_144_c:       24.7   22.0
lumRangeFromJpeg_144_rvv_i32:  4.7    2.7
lumRangeFromJpeg_256_c:       43.7   39.0
lumRangeFromJpeg_256_rvv_i32:  7.5    3.2
lumRangeFromJpeg_512_c:       87.0   77.2
lumRangeFromJpeg_512_rvv_i32: 14.5    6.7
lumRangeToJpeg_8_c:            2.7    2.2
lumRangeToJpeg_8_rvv_i32:      1.0    1.0
lumRangeToJpeg_24_c:           7.2    6.5
lumRangeToJpeg_24_rvv_i32:     1.2    1.0
lumRangeToJpeg_128_c:         37.7   33.7
lumRangeToJpeg_128_rvv_i32:    3.7    2.0
lumRangeToJpeg_144_c:         42.5   37.7
lumRangeToJpeg_144_rvv_i32:    4.7    2.7
lumRangeToJpeg_256_c:         75.0   66.7
lumRangeToJpeg_256_rvv_i32:    7.5    3.5
lumRangeToJpeg_512_c:        149.5  133.0
lumRangeToJpeg_512_rvv_i32:   14.7    7.0
2024-06-10 22:48:52 +03:00

127 lines
4.4 KiB
C

/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/riscv/cpu.h"
#include "libswscale/swscale_internal.h"
void ff_range_lum_to_jpeg_16_rvv(int16_t *, int);
void ff_range_chr_to_jpeg_16_rvv(int16_t *, int16_t *, int);
void ff_range_lum_from_jpeg_16_rvv(int16_t *, int);
void ff_range_chr_from_jpeg_16_rvv(int16_t *, int16_t *, int);
av_cold static void ff_sws_init_range_convert_riscv(SwsContext *c, int flags)
{
#if HAVE_RVV
static const struct {
void (*lum)(int16_t *, int);
void (*chr)(int16_t *, int16_t *, int);
} convs[2] = {
{ ff_range_lum_to_jpeg_16_rvv, ff_range_chr_to_jpeg_16_rvv },
{ ff_range_lum_from_jpeg_16_rvv, ff_range_chr_from_jpeg_16_rvv },
};
if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat) &&
c->dstBpc <= 14 &&
(flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
bool from = c->srcRange != 0;
c->lumConvertRange = convs[from].lum;
c->chrConvertRange = convs[from].chr;
}
#endif
}
#define RVV_INPUT(name) \
void ff_##name##ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *, \
const uint8_t *, int w, uint32_t *coeffs, void *); \
void ff_##name##ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, \
const uint8_t *, const uint8_t *, int w, \
uint32_t *coeffs, void *); \
void ff_##name##ToUV_half_rvv(uint8_t *, uint8_t *, const uint8_t *, \
const uint8_t *, const uint8_t *, int w, \
uint32_t *coeffs, void *)
RVV_INPUT(abgr32);
RVV_INPUT(argb32);
RVV_INPUT(bgr24);
RVV_INPUT(bgra32);
RVV_INPUT(rgb24);
RVV_INPUT(rgba32);
av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
{
int flags = av_get_cpu_flags();
#if HAVE_RVV
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
switch (c->srcFormat) {
case AV_PIX_FMT_ABGR:
c->lumToYV12 = ff_abgr32ToY_rvv;
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_abgr32ToUV_half_rvv;
else
c->chrToYV12 = ff_abgr32ToUV_rvv;
break;
case AV_PIX_FMT_ARGB:
c->lumToYV12 = ff_argb32ToY_rvv;
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_argb32ToUV_half_rvv;
else
c->chrToYV12 = ff_argb32ToUV_rvv;
break;
case AV_PIX_FMT_BGR24:
c->lumToYV12 = ff_bgr24ToY_rvv;
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_bgr24ToUV_half_rvv;
else
c->chrToYV12 = ff_bgr24ToUV_rvv;
break;
case AV_PIX_FMT_BGRA:
c->lumToYV12 = ff_bgra32ToY_rvv;
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_bgra32ToUV_half_rvv;
else
c->chrToYV12 = ff_bgra32ToUV_rvv;
break;
case AV_PIX_FMT_RGB24:
c->lumToYV12 = ff_rgb24ToY_rvv;
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_rgb24ToUV_half_rvv;
else
c->chrToYV12 = ff_rgb24ToUV_rvv;
break;
case AV_PIX_FMT_RGBA:
c->lumToYV12 = ff_rgba32ToY_rvv;
if (c->chrSrcHSubSample)
c->chrToYV12 = ff_rgba32ToUV_half_rvv;
else
c->chrToYV12 = ff_rgba32ToUV_rvv;
break;
}
}
#endif
ff_sws_init_range_convert_riscv(c, flags);
}