avcodec/ac3: Implement ac3_extract_exponents for aarch64 NEON

Signed-off-by: Geoff Hill <geoff@geoffhill.org>
Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
Geoff Hill 2024-04-06 07:26:01 -07:00 committed by Martin Storsjö
parent 6f6bd10531
commit 69cb34f885
3 changed files with 54 additions and 0 deletions

View File

@ -26,6 +26,7 @@
#include "config.h"
void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
@ -34,5 +35,6 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
if (!have_neon(cpu_flags)) return;
c->ac3_exponent_min = ff_ac3_exponent_min_neon;
c->extract_exponents = ff_ac3_extract_exponents_neon;
c->float_to_fixed24 = ff_float_to_fixed24_neon;
}

View File

@ -37,6 +37,20 @@ function ff_ac3_exponent_min_neon, export=1
3: ret
endfunc
function ff_ac3_extract_exponents_neon, export=1
movi v1.4s, #8
1: ld1 {v0.4s}, [x1], #16
abs v0.4s, v0.4s
clz v0.4s, v0.4s
sub v0.4s, v0.4s, v1.4s
xtn v0.4h, v0.4s
xtn v0.8b, v0.8h
st1 {v0.s}[0], [x0], #4
subs w2, w2, #4
b.gt 1b
ret
endfunc
function ff_float_to_fixed24_neon, export=1
1: ld1 {v0.4s, v1.4s}, [x1], #32
fcvtzs v0.4s, v0.4s, #24

View File

@ -19,6 +19,7 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <stdint.h>
#include <string.h>
#include "libavutil/mem.h"
@ -36,6 +37,16 @@
} \
} while (0)
#define randomize_i24(buf, len) \
do { \
int i; \
for (i = 0; i < len; i++) { \
int32_t v = (int32_t)rnd(); \
int32_t u = (v & 0xFFFFFF); \
buf[i] = (v < 0) ? -u : u; \
} \
} while (0)
#define randomize_float(buf, len) \
do { \
int i; \
@ -77,6 +88,32 @@ static void check_ac3_exponent_min(AC3DSPContext *c) {
report("ac3_exponent_min");
}
static void check_ac3_extract_exponents(AC3DSPContext *c) {
#define MAX_EXPS 3072
LOCAL_ALIGNED_16(int32_t, src, [MAX_EXPS]);
LOCAL_ALIGNED_16(uint8_t, v1, [MAX_EXPS]);
LOCAL_ALIGNED_16(uint8_t, v2, [MAX_EXPS]);
int n;
declare_func(void, uint8_t *, int32_t *, int);
for (n = 512; n <= MAX_EXPS; n += 256) {
if (check_func(c->extract_exponents, "ac3_extract_exponents_n%d", n)) {
randomize_i24(src, n);
call_ref(v1, src, n);
call_new(v2, src, n);
if (memcmp(v1, v2, n) != 0)
fail();
bench_new(v1, src, n);
}
}
report("ac3_extract_exponents");
}
static void check_float_to_fixed24(AC3DSPContext *c) {
#define BUF_SIZE 1024
LOCAL_ALIGNED_32(float, src, [BUF_SIZE]);
@ -108,5 +145,6 @@ void checkasm_check_ac3dsp(void)
ff_ac3dsp_init(&c);
check_ac3_exponent_min(&c);
check_ac3_extract_exponents(&c);
check_float_to_fixed24(&c);
}