mirror of https://git.ffmpeg.org/ffmpeg.git
avcodec/ac3: Implement sum_square_butterfly_int32 for aarch64 NEON
Signed-off-by: Geoff Hill <geoff@geoffhill.org> Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
69cb34f885
commit
42e88f18f3
|
@ -28,6 +28,10 @@
|
||||||
void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
|
void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
|
||||||
void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
|
void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
|
||||||
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
|
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
|
||||||
|
void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
|
||||||
|
const int32_t *coef0,
|
||||||
|
const int32_t *coef1,
|
||||||
|
int len);
|
||||||
|
|
||||||
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
|
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
|
||||||
{
|
{
|
||||||
|
@ -37,4 +41,5 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
|
||||||
c->ac3_exponent_min = ff_ac3_exponent_min_neon;
|
c->ac3_exponent_min = ff_ac3_exponent_min_neon;
|
||||||
c->extract_exponents = ff_ac3_extract_exponents_neon;
|
c->extract_exponents = ff_ac3_extract_exponents_neon;
|
||||||
c->float_to_fixed24 = ff_float_to_fixed24_neon;
|
c->float_to_fixed24 = ff_float_to_fixed24_neon;
|
||||||
|
c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,3 +64,26 @@ function ff_float_to_fixed24_neon, export=1
|
||||||
b.ne 1b
|
b.ne 1b
|
||||||
ret
|
ret
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
function ff_ac3_sum_square_butterfly_int32_neon, export=1
|
||||||
|
movi v0.2d, #0
|
||||||
|
movi v1.2d, #0
|
||||||
|
movi v2.2d, #0
|
||||||
|
movi v3.2d, #0
|
||||||
|
1: ld1 {v4.2s}, [x1], #8
|
||||||
|
ld1 {v5.2s}, [x2], #8
|
||||||
|
add v6.2s, v4.2s, v5.2s
|
||||||
|
sub v7.2s, v4.2s, v5.2s
|
||||||
|
smlal v0.2d, v4.2s, v4.2s
|
||||||
|
smlal v1.2d, v5.2s, v5.2s
|
||||||
|
smlal v2.2d, v6.2s, v6.2s
|
||||||
|
smlal v3.2d, v7.2s, v7.2s
|
||||||
|
subs w3, w3, #2
|
||||||
|
b.gt 1b
|
||||||
|
addp d0, v0.2d
|
||||||
|
addp d1, v1.2d
|
||||||
|
addp d2, v2.2d
|
||||||
|
addp d3, v3.2d
|
||||||
|
st1 {v0.1d-v3.1d}, [x0]
|
||||||
|
ret
|
||||||
|
endfunc
|
||||||
|
|
|
@ -139,6 +139,32 @@ static void check_float_to_fixed24(AC3DSPContext *c) {
|
||||||
report("float_to_fixed24");
|
report("float_to_fixed24");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void check_ac3_sum_square_butterfly_int32(AC3DSPContext *c) {
|
||||||
|
#define ELEMS 240
|
||||||
|
LOCAL_ALIGNED_16(int32_t, lt, [ELEMS]);
|
||||||
|
LOCAL_ALIGNED_16(int32_t, rt, [ELEMS]);
|
||||||
|
LOCAL_ALIGNED_16(uint64_t, v1, [4]);
|
||||||
|
LOCAL_ALIGNED_16(uint64_t, v2, [4]);
|
||||||
|
|
||||||
|
declare_func(void, int64_t[4], const int32_t *, const int32_t *, int);
|
||||||
|
|
||||||
|
randomize_i24(lt, ELEMS);
|
||||||
|
randomize_i24(rt, ELEMS);
|
||||||
|
|
||||||
|
if (check_func(c->sum_square_butterfly_int32,
|
||||||
|
"ac3_sum_square_bufferfly_int32")) {
|
||||||
|
call_ref(v1, lt, rt, ELEMS);
|
||||||
|
call_new(v2, lt, rt, ELEMS);
|
||||||
|
|
||||||
|
if (memcmp(v1, v2, sizeof(int64_t[4])) != 0)
|
||||||
|
fail();
|
||||||
|
|
||||||
|
bench_new(v2, lt, rt, ELEMS);
|
||||||
|
}
|
||||||
|
|
||||||
|
report("ac3_sum_square_butterfly_int32");
|
||||||
|
}
|
||||||
|
|
||||||
void checkasm_check_ac3dsp(void)
|
void checkasm_check_ac3dsp(void)
|
||||||
{
|
{
|
||||||
AC3DSPContext c;
|
AC3DSPContext c;
|
||||||
|
@ -147,4 +173,5 @@ void checkasm_check_ac3dsp(void)
|
||||||
check_ac3_exponent_min(&c);
|
check_ac3_exponent_min(&c);
|
||||||
check_ac3_extract_exponents(&c);
|
check_ac3_extract_exponents(&c);
|
||||||
check_float_to_fixed24(&c);
|
check_float_to_fixed24(&c);
|
||||||
|
check_ac3_sum_square_butterfly_int32(&c);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue