mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-27 09:52:17 +00:00
04d092e7d5
RV64G supports MIN & MAX instructions natively only on floating point registers, not general purpose ones. The later would require the Zbb extension. Due to that, it is actually faster to perform the clipping "properly" in FPU. Benchmarks on SiFive U74-MC (courtesy of Shanghai StarFive Tech): audiodsp.vector_clipf_c: 29551.5 audiodsp.vector_clipf_rvf: 17871.0 Also tried unrolling with 2 or 8 elements but it gets worse either way.
122 lines
3.9 KiB
C
122 lines
3.9 KiB
C
/*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
|
|
#include "libavutil/attributes.h"
|
|
#include "libavutil/common.h"
|
|
#include "audiodsp.h"
|
|
|
|
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
|
|
uint32_t maxi, uint32_t maxisign)
|
|
{
|
|
if (a > mini)
|
|
return mini;
|
|
else if ((a ^ (1U << 31)) > maxisign)
|
|
return maxi;
|
|
else
|
|
return a;
|
|
}
|
|
|
|
static void vector_clipf_c_opposite_sign(float *dst, const float *src,
|
|
float *min, float *max, int len)
|
|
{
|
|
int i;
|
|
uint32_t mini = *(uint32_t *) min;
|
|
uint32_t maxi = *(uint32_t *) max;
|
|
uint32_t maxisign = maxi ^ (1U << 31);
|
|
uint32_t *dsti = (uint32_t *) dst;
|
|
const uint32_t *srci = (const uint32_t *) src;
|
|
|
|
for (i = 0; i < len; i += 8) {
|
|
dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
|
|
dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
|
|
dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
|
|
dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
|
|
dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
|
|
dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
|
|
dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
|
|
dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
|
|
}
|
|
}
|
|
|
|
static void vector_clipf_c(float *dst, const float *src, int len,
|
|
float min, float max)
|
|
{
|
|
int i;
|
|
|
|
if (min < 0 && max > 0) {
|
|
vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
|
|
} else {
|
|
for (i = 0; i < len; i += 8) {
|
|
dst[i] = av_clipf(src[i], min, max);
|
|
dst[i + 1] = av_clipf(src[i + 1], min, max);
|
|
dst[i + 2] = av_clipf(src[i + 2], min, max);
|
|
dst[i + 3] = av_clipf(src[i + 3], min, max);
|
|
dst[i + 4] = av_clipf(src[i + 4], min, max);
|
|
dst[i + 5] = av_clipf(src[i + 5], min, max);
|
|
dst[i + 6] = av_clipf(src[i + 6], min, max);
|
|
dst[i + 7] = av_clipf(src[i + 7], min, max);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2,
|
|
int order)
|
|
{
|
|
unsigned res = 0;
|
|
|
|
while (order--)
|
|
res += *v1++ **v2++;
|
|
|
|
return res;
|
|
}
|
|
|
|
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
|
|
int32_t max, unsigned int len)
|
|
{
|
|
do {
|
|
*dst++ = av_clip(*src++, min, max);
|
|
*dst++ = av_clip(*src++, min, max);
|
|
*dst++ = av_clip(*src++, min, max);
|
|
*dst++ = av_clip(*src++, min, max);
|
|
*dst++ = av_clip(*src++, min, max);
|
|
*dst++ = av_clip(*src++, min, max);
|
|
*dst++ = av_clip(*src++, min, max);
|
|
*dst++ = av_clip(*src++, min, max);
|
|
len -= 8;
|
|
} while (len > 0);
|
|
}
|
|
|
|
av_cold void ff_audiodsp_init(AudioDSPContext *c)
|
|
{
|
|
c->scalarproduct_int16 = scalarproduct_int16_c;
|
|
c->vector_clip_int32 = vector_clip_int32_c;
|
|
c->vector_clipf = vector_clipf_c;
|
|
|
|
#if ARCH_ARM
|
|
ff_audiodsp_init_arm(c);
|
|
#elif ARCH_PPC
|
|
ff_audiodsp_init_ppc(c);
|
|
#elif ARCH_RISCV
|
|
ff_audiodsp_init_riscv(c);
|
|
#elif ARCH_X86
|
|
ff_audiodsp_init_x86(c);
|
|
#endif
|
|
}
|