lavc/idctdsp: RISC-V V add_pixels_clamped function

This commit is contained in:
Rémi Denis-Courmont 2022-09-27 23:04:23 +03:00 committed by Lynne
parent b29ee63a1b
commit fa983b5656
2 changed files with 21 additions and 1 deletions

View File

@ -28,6 +28,8 @@
void ff_put_pixels_clamped_rvv(const int16_t *block, uint8_t *pixels,
ptrdiff_t stride);
void ff_add_pixels_clamped_rvv(const int16_t *block, uint8_t *pixels,
ptrdiff_t stride);
av_cold void ff_idctdsp_init_riscv(IDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
@ -35,7 +37,9 @@ av_cold void ff_idctdsp_init_riscv(IDCTDSPContext *c, AVCodecContext *avctx,
#if HAVE_RVV
int flags = av_get_cpu_flags();
if ((flags & AV_CPU_FLAG_RVV_I32) && ff_get_rv_vlenb() >= 16)
if ((flags & AV_CPU_FLAG_RVV_I32) && ff_get_rv_vlenb() >= 16) {
c->put_pixels_clamped = ff_put_pixels_clamped_rvv;
c->add_pixels_clamped = ff_add_pixels_clamped_rvv;
}
#endif
}

View File

@ -24,6 +24,7 @@
func ff_put_pixels_clamped_rvv, zve32x
vsetivli zero, 8, e16, m1, ta, ma
vlseg8e16.v v24, (a0)
1:
/* RVV only has signed-signed and unsigned-unsigned clipping.
* We need two steps for signed-to-unsigned clipping. */
vsetvli t0, zero, e16, m8, ta, ma
@ -41,3 +42,18 @@ func ff_put_pixels_clamped_rvv, zve32x
vssseg8e8.v v16, (a1), a2
ret
endfunc
func ff_add_pixels_clamped_rvv, zve32x
vsetivli zero, 8, e8, mf2, ta, ma
vlseg8e16.v v24, (a0)
vlsseg8e8.v v16, (a1), a2
vwaddu.wv v24, v24, v16
vwaddu.wv v25, v25, v17
vwaddu.wv v26, v26, v18
vwaddu.wv v27, v27, v19
vwaddu.wv v28, v28, v20
vwaddu.wv v29, v29, v21
vwaddu.wv v30, v30, v22
vwaddu.wv v31, v31, v23
j 1b
endfunc