From fd4977c87637f8ae3a48df555270c585d0667132 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Fri, 31 May 2024 22:17:19 +0300 Subject: [PATCH] lavc/aacencdsp: R-V V quant_bands T-Head C908: quant_bands_signed_c: 576.0 quant_bands_signed_rvv_f32: 48.7 quant_bands_unsigned_c: 414.2 quant_bands_unsigned_rvv_f32: 31.7 SpacemiT X60: quant_bands_signed_c: 497.7 quant_bands_signed_rvv_f32: 23.0 quant_bands_unsigned_c: 353.5 quant_bands_unsigned_rvv_f32: 16.2 --- libavcodec/riscv/aacencdsp_init.c | 3 +++ libavcodec/riscv/aacencdsp_rvv.S | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/libavcodec/riscv/aacencdsp_init.c b/libavcodec/riscv/aacencdsp_init.c index b27af9d973..73bc8d8fa3 100644 --- a/libavcodec/riscv/aacencdsp_init.c +++ b/libavcodec/riscv/aacencdsp_init.c @@ -26,6 +26,8 @@ #include "libavcodec/aacencdsp.h" void ff_abs_pow34_rvv(float *out, const float *in, const int size); +void ff_aac_quant_bands_rvv(int *, const float *, const float *, int, int, + int, const float, const float); av_cold void ff_aacenc_dsp_init_riscv(AACEncDSPContext *s) { @@ -35,6 +37,7 @@ av_cold void ff_aacenc_dsp_init_riscv(AACEncDSPContext *s) if (flags & AV_CPU_FLAG_RVV_F32) { if (flags & AV_CPU_FLAG_RVB_ADDR) { s->abs_pow34 = ff_abs_pow34_rvv; + s->quant_bands = ff_aac_quant_bands_rvv; } } #endif diff --git a/libavcodec/riscv/aacencdsp_rvv.S b/libavcodec/riscv/aacencdsp_rvv.S index 4c7a874d77..ddb4155398 100644 --- a/libavcodec/riscv/aacencdsp_rvv.S +++ b/libavcodec/riscv/aacencdsp_rvv.S @@ -1,5 +1,6 @@ /* * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS). + * Copyright © 2024 Rémi Denis-Courmont. * * This file is part of FFmpeg. * @@ -36,3 +37,33 @@ func ff_abs_pow34_rvv, zve32f ret endfunc + +func ff_aac_quant_bands_rvv, zve32f +NOHWF fmv.w.x fa0, a6 +NOHWF fmv.w.x fa1, a7 + fcvt.s.w ft0, a5 + bnez a4, .L1 # signed? + .irp signed, 0, 1 +.L\signed: + vsetvli t0, a3, e32, m8, ta, ma + vle32.v v16, (a2) + sub a3, a3, t0 + .if \signed + vle32.v v8, (a1) + sh2add a1, t0, a1 + .endif + vfmul.vf v16, v16, fa0 + sh2add a2, t0, a2 + vfadd.vf v16, v16, fa1 + vfmin.vf v16, v16, ft0 + .if \signed + vfsgnjx.vv v16, v16, v8 + .endif + vfcvt.x.f.v v16, v16 + vse32.v v16, (a0) + sh2add a0, t0, a0 + bnez a3, .L\signed + + ret + .endr +endfunc