ffmpeg/libavcodec/aarch64
Mikhail Nitenko 0f745b74ec lavc/aarch64: h264qpel, add 10-bit lowpass_8_10 based functions
Benchmarks                         A53      A55     A72     A76
avg_h264_qpel_8_mc01_10_c:        936.5    924.0   656.0   504.7
avg_h264_qpel_8_mc01_10_neon:     234.7    202.0   120.7    63.2
avg_h264_qpel_8_mc02_10_c:        921.0    920.0   669.2   493.7
avg_h264_qpel_8_mc02_10_neon:     202.0    173.2   102.7    58.5
avg_h264_qpel_8_mc03_10_c:        936.5    924.0   656.0   509.5
avg_h264_qpel_8_mc03_10_neon:     236.2    203.7   120.0    63.2
avg_h264_qpel_8_mc10_10_c:       1441.0   1437.7   806.7   478.5
avg_h264_qpel_8_mc10_10_neon:     325.7    324.0   153.7    94.2
avg_h264_qpel_8_mc11_10_c:       2160.7   2148.2  1366.7   906.7
avg_h264_qpel_8_mc11_10_neon:     492.0    464.0   242.5   134.5
avg_h264_qpel_8_mc13_10_c:       2157.0   2138.2  1357.0   908.2
avg_h264_qpel_8_mc13_10_neon:     494.0    467.2   242.0   140.0
avg_h264_qpel_8_mc20_10_c:       1433.5   1410.0   785.2   486.0
avg_h264_qpel_8_mc20_10_neon:     293.7    289.7   138.0    91.5
avg_h264_qpel_8_mc30_10_c:       1458.5   1461.7   813.7   483.2
avg_h264_qpel_8_mc30_10_neon:     341.7    339.2   154.0    95.2
avg_h264_qpel_8_mc31_10_c:       2194.7   2197.2  1358.7   928.0
avg_h264_qpel_8_mc31_10_neon:     520.0    495.0   245.5   142.5
avg_h264_qpel_8_mc33_10_c:       2188.0   2205.5  1356.7   910.7
avg_h264_qpel_8_mc33_10_neon:     521.0    494.5   245.7   145.7
avg_h264_qpel_16_mc01_10_c:      3717.2   3595.0  2610.0  2012.0
avg_h264_qpel_16_mc01_10_neon:    920.5    791.5   483.2   240.5
avg_h264_qpel_16_mc02_10_c:      3684.0   3633.0  2659.0  1919.7
avg_h264_qpel_16_mc02_10_neon:    790.7    678.2   409.2   217.0
avg_h264_qpel_16_mc03_10_c:      3726.5   3596.0  2606.7  2010.0
avg_h264_qpel_16_mc03_10_neon:    922.0    792.5   483.2   239.7
avg_h264_qpel_16_mc10_10_c:      5912.0   5803.2  3241.5  1916.7
avg_h264_qpel_16_mc10_10_neon:   1267.5   1277.2   616.5   365.0
avg_h264_qpel_16_mc11_10_c:      8599.2   8482.5  5338.0  3616.2
avg_h264_qpel_16_mc11_10_neon:   1913.0   1827.0   956.2   542.2
avg_h264_qpel_16_mc13_10_c:      8643.7   8488.5  5388.0  3628.5
avg_h264_qpel_16_mc13_10_neon:   1914.7   1828.7   969.2   530.5
avg_h264_qpel_16_mc20_10_c:      5719.5   5641.0  3147.0  1946.2
avg_h264_qpel_16_mc20_10_neon:   1139.5   1150.0   539.5   344.0
avg_h264_qpel_16_mc30_10_c:      5930.0   5872.5  3267.5  1918.0
avg_h264_qpel_16_mc30_10_neon:   1331.5   1341.2   616.5   369.5
avg_h264_qpel_16_mc31_10_c:      8758.7   8697.7  5353.0  3630.7
avg_h264_qpel_16_mc31_10_neon:   2018.7   1941.7   982.2   574.7
avg_h264_qpel_16_mc33_10_c:      8683.2   8675.2  5339.2  3634.7
avg_h264_qpel_16_mc33_10_neon:   2019.7   1940.2   994.5   566.0
put_h264_qpel_8_mc01_10_c:        854.2    843.0   599.2   478.0
put_h264_qpel_8_mc01_10_neon:     192.7    168.0   101.7    56.7
put_h264_qpel_8_mc02_10_c:        766.5    760.0   550.2   441.0
put_h264_qpel_8_mc02_10_neon:     160.0    139.2    88.7    53.0
put_h264_qpel_8_mc03_10_c:        854.2    843.0   599.2   479.0
put_h264_qpel_8_mc03_10_neon:     194.2    169.7   102.0    56.2
put_h264_qpel_8_mc10_10_c:       1352.7   1353.7   749.7   446.7
put_h264_qpel_8_mc10_10_neon:     289.7    294.2   135.5    88.5
put_h264_qpel_8_mc11_10_c:       2080.0   2066.2  1309.5   876.7
put_h264_qpel_8_mc11_10_neon:     450.0    429.7   229.7   131.2
put_h264_qpel_8_mc13_10_c:       2074.7   2060.2  1294.5   870.5
put_h264_qpel_8_mc13_10_neon:     452.5    434.5   226.5   130.0
put_h264_qpel_8_mc20_10_c:       1221.5   1216.0   684.5   399.7
put_h264_qpel_8_mc20_10_neon:     257.7    262.5   121.2    78.7
put_h264_qpel_8_mc30_10_c:       1379.0   1374.7   757.2   449.5
put_h264_qpel_8_mc30_10_neon:     305.7    310.2   135.5    86.5
put_h264_qpel_8_mc31_10_c:       2109.2   2119.7  1299.5   878.0
put_h264_qpel_8_mc31_10_neon:     478.0    458.5   226.0   137.2
put_h264_qpel_8_mc33_10_c:       2101.5   2115.2  1306.5   887.0
put_h264_qpel_8_mc33_10_neon:     479.0    458.7   229.7   141.7
put_h264_qpel_16_mc01_10_c:      3485.7   3396.7  2460.5  1914.5
put_h264_qpel_16_mc01_10_neon:    752.5    665.5   397.0   213.2
put_h264_qpel_16_mc02_10_c:      3103.5   3023.2  2154.7  1720.7
put_h264_qpel_16_mc02_10_neon:    622.7    551.2   347.7   196.2
put_h264_qpel_16_mc03_10_c:      3486.2   3394.0  2436.5  1917.7
put_h264_qpel_16_mc03_10_neon:    754.0    666.5   397.0   215.7
put_h264_qpel_16_mc10_10_c:      5533.0   5488.5  2989.0  1783.0
put_h264_qpel_16_mc10_10_neon:   1123.5   1165.2   535.2   334.7
put_h264_qpel_16_mc11_10_c:      8437.7   8281.2  5209.0  3510.7
put_h264_qpel_16_mc11_10_neon:   1745.0   1697.0   878.5   513.5
put_h264_qpel_16_mc13_10_c:      8567.7   8468.0  5221.5  3528.0
put_h264_qpel_16_mc13_10_neon:   1751.7   1698.2   889.2   507.0
put_h264_qpel_16_mc20_10_c:      4907.5   4885.0  2786.2  1607.5
put_h264_qpel_16_mc20_10_neon:    995.5   1034.5   475.5   307.0
put_h264_qpel_16_mc30_10_c:      5579.7   5537.7  3045.2  1789.5
put_h264_qpel_16_mc30_10_neon:   1187.5   1231.2   532.5   334.5
put_h264_qpel_16_mc31_10_c:      8677.2   8672.5  5204.2  3516.0
put_h264_qpel_16_mc31_10_neon:   1850.7   1813.2   893.0   545.2
put_h264_qpel_16_mc33_10_c:      8688.7   8671.2  5223.2  3512.0
put_h264_qpel_16_mc33_10_neon:   1851.7   1814.2   908.5   535.2

Signed-off-by: Mikhail Nitenko <mnitenko@gmail.com>
Signed-off-by: Martin Storsjö <martin@martin.st>
2023-12-07 23:20:14 +02:00
..
Makefile
aacpsdsp_init_aarch64.c
aacpsdsp_neon.S aarch64: Reindent all assembly to 8/24 column indentation 2023-10-21 23:25:54 +03:00
cabac.h
fmtconvert_init.c
fmtconvert_neon.S
h264chroma_init_aarch64.c
h264cmc_neon.S aarch64: Lowercase UXTW/SXTW and similar flags 2023-10-21 23:25:23 +03:00
h264dsp_init_aarch64.c
h264dsp_neon.S aarch64: Make the indentation more consistent 2023-10-21 23:25:29 +03:00
h264idct_neon.S aarch64: Lowercase UXTW/SXTW and similar flags 2023-10-21 23:25:23 +03:00
h264pred_init.c
h264pred_neon.S
h264qpel_init_aarch64.c lavc/aarch64: h264qpel, add 10-bit lowpass_8_10 based functions 2023-12-07 23:20:14 +02:00
h264qpel_neon.S lavc/aarch64: h264qpel, add 10-bit lowpass_8_10 based functions 2023-12-07 23:20:14 +02:00
hevcdsp_deblock_neon.S
hevcdsp_epel_neon.S lavc/aarch64: new optimization for 8-bit hevc_epel_bi_hv 2023-12-01 21:25:39 +02:00
hevcdsp_idct_neon.S aarch64: Make the indentation more consistent 2023-10-21 23:25:29 +03:00
hevcdsp_init_aarch64.c lavc/aarch64: new optimization for 8-bit hevc_qpel_bi_hv 2023-12-01 21:25:39 +02:00
hevcdsp_qpel_neon.S lavc/aarch64: new optimization for 8-bit hevc_qpel_bi_hv 2023-12-01 21:25:39 +02:00
hevcdsp_sao_neon.S
hpeldsp_init_aarch64.c
hpeldsp_neon.S aarch64: Consistently use lowercase for vector element specifiers 2023-10-21 23:25:18 +03:00
idct.h
idctdsp_init_aarch64.c
idctdsp_neon.S
me_cmp_init_aarch64.c
me_cmp_neon.S aarch64: Consistently use lowercase for vector element specifiers 2023-10-21 23:25:18 +03:00
mpegaudiodsp_init.c
mpegaudiodsp_neon.S lavc/hevcdsp_qpel_neon: using movi.16b instead of movi.2d 2023-11-28 15:54:49 +02:00
neon.S aarch64: Consistently use lowercase for vector element specifiers 2023-10-21 23:25:18 +03:00
neontest.c
opusdsp_init.c
opusdsp_neon.S aarch64: Reindent all assembly to 8/24 column indentation 2023-10-21 23:25:54 +03:00
pixblockdsp_init_aarch64.c
pixblockdsp_neon.S
rv40dsp_init_aarch64.c
sbrdsp_init_aarch64.c
sbrdsp_neon.S aarch64: Consistently use lowercase for vector element specifiers 2023-10-21 23:25:18 +03:00
simple_idct_neon.S aarch64: Consistently use lowercase for vector element specifiers 2023-10-21 23:25:18 +03:00
synth_filter_init.c
synth_filter_neon.S
vc1dsp_init_aarch64.c
vc1dsp_neon.S
videodsp.S
videodsp_init.c
vorbisdsp_init.c
vorbisdsp_neon.S
vp8dsp.h
vp8dsp_init_aarch64.c
vp8dsp_neon.S aarch64: Make the indentation more consistent 2023-10-21 23:25:29 +03:00
vp9dsp_init.h
vp9dsp_init_10bpp_aarch64.c
vp9dsp_init_12bpp_aarch64.c
vp9dsp_init_16bpp_aarch64_template.c
vp9dsp_init_aarch64.c
vp9itxfm_16bpp_neon.S
vp9itxfm_neon.S
vp9lpf_16bpp_neon.S
vp9lpf_neon.S
vp9mc_16bpp_neon.S
vp9mc_aarch64.S
vp9mc_neon.S