mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-04 22:30:25 +00:00
7e42d5f0ab
The previous version was a pretty exact translation of the arm version. This version does do some unnecessary arithemetic (it does more operations on vectors that are only half filled; it does 4 uaddw and 4 sqxtun instead of 2 of each), but it reduces the overhead of packing data together (which could be done for free in the arm version). This gives a decent speedup on Cortex A53, a minor speedup on A72 and a very minor slowdown on Cortex A73. Before: Cortex A53 A72 A73 vp8_idct_add_neon: 79.7 67.5 65.0 After: vp8_idct_add_neon: 67.7 64.8 66.7 Signed-off-by: Martin Storsjö <martin@martin.st> |
||
---|---|---|
.. | ||
asm-offsets.h | ||
cabac.h | ||
dcadsp_init.c | ||
dcadsp_neon.S | ||
fft_init_aarch64.c | ||
fft_neon.S | ||
fmtconvert_init.c | ||
fmtconvert_neon.S | ||
h264chroma_init_aarch64.c | ||
h264cmc_neon.S | ||
h264dsp_init_aarch64.c | ||
h264dsp_neon.S | ||
h264idct_neon.S | ||
h264pred_init.c | ||
h264pred_neon.S | ||
h264qpel_init_aarch64.c | ||
h264qpel_neon.S | ||
hpeldsp_init_aarch64.c | ||
hpeldsp_neon.S | ||
imdct15_init.c | ||
imdct15_neon.S | ||
Makefile | ||
mdct_init.c | ||
mdct_neon.S | ||
mpegaudiodsp_init.c | ||
mpegaudiodsp_neon.S | ||
neon.S | ||
neontest.c | ||
rv40dsp_init_aarch64.c | ||
synth_filter_neon.S | ||
vc1dsp_init_aarch64.c | ||
videodsp_init.c | ||
videodsp.S | ||
vorbisdsp_init.c | ||
vorbisdsp_neon.S | ||
vp8dsp_init_aarch64.c | ||
vp8dsp_neon.S | ||
vp8dsp.h | ||
vp9dsp_init_aarch64.c | ||
vp9itxfm_neon.S | ||
vp9lpf_neon.S | ||
vp9mc_neon.S |