ffmpeg/libavcodec/riscv
Niklas Haas 2f77ecc6bc avcodec/riscv: add h264 qpel
Benched on K230 for VLEN 128, SpaceMIT for VLEN 256. Variants for 4
width have no speedup for VLEN 256 vs VLEN 128 on available hardware,
so were disabled.

                        C      RVV128          C     RVV256
avg_h264_qpel_4_mc00_8  33.9   33.6   (1.01x)
avg_h264_qpel_4_mc01_8  218.8  89.1   (2.46x)
avg_h264_qpel_4_mc02_8  218.8  79.8   (2.74x)
avg_h264_qpel_4_mc03_8  218.8  89.1   (2.46x)
avg_h264_qpel_4_mc10_8  172.3  126.1  (1.37x)
avg_h264_qpel_4_mc11_8  339.1  190.8  (1.78x)
avg_h264_qpel_4_mc12_8  533.6  357.6  (1.49x)
avg_h264_qpel_4_mc13_8  348.4  190.8  (1.83x)
avg_h264_qpel_4_mc20_8  144.8  116.8  (1.24x)
avg_h264_qpel_4_mc21_8  478.1  385.6  (1.24x)
avg_h264_qpel_4_mc22_8  348.4  283.6  (1.23x)
avg_h264_qpel_4_mc23_8  478.1  394.6  (1.21x)
avg_h264_qpel_4_mc30_8  172.6  126.1  (1.37x)
avg_h264_qpel_4_mc31_8  339.4  191.1  (1.78x)
avg_h264_qpel_4_mc32_8  542.9  357.6  (1.52x)
avg_h264_qpel_4_mc33_8  339.4  191.1  (1.78x)
avg_h264_qpel_8_mc00_8  116.8  42.9   (2.72x)  123.6  50.6   (2.44x)
avg_h264_qpel_8_mc01_8  774.4  163.1  (4.75x)  779.8  165.1  (4.72x)
avg_h264_qpel_8_mc02_8  774.4  154.1  (5.03x)  779.8  144.3  (5.40x)
avg_h264_qpel_8_mc03_8  774.4  163.3  (4.74x)  779.8  165.3  (4.72x)
avg_h264_qpel_8_mc10_8  617.1  237.3  (2.60x)  613.1  227.6  (2.69x)
avg_h264_qpel_8_mc11_8  1209.3 376.4  (3.21x)  1206.8 363.1  (3.32x)
avg_h264_qpel_8_mc12_8  1913.3 598.6  (3.20x)  1894.3 561.1  (3.38x)
avg_h264_qpel_8_mc13_8  1218.6 376.4  (3.24x)  1217.1 363.1  (3.35x)
avg_h264_qpel_8_mc20_8  524.4  228.1  (2.30x)  519.3  227.6  (2.28x)
avg_h264_qpel_8_mc21_8  1709.6 681.9  (2.51x)  1707.1 644.3  (2.65x)
avg_h264_qpel_8_mc22_8  1274.3 459.6  (2.77x)  1279.8 436.1  (2.93x)
avg_h264_qpel_8_mc23_8  1700.3 672.6  (2.53x)  1706.8 644.6  (2.65x)
avg_h264_qpel_8_mc30_8  607.6  246.6  (2.46x)  623.6  238.1  (2.62x)
avg_h264_qpel_8_mc31_8  1209.6 376.4  (3.21x)  1206.8 363.1  (3.32x)
avg_h264_qpel_8_mc32_8  1904.1 607.9  (3.13x)  1894.3 571.3  (3.32x)
avg_h264_qpel_8_mc33_8  1209.6 376.1  (3.22x)  1206.8 363.1  (3.32x)
avg_h264_qpel_16_mc00_8 431.9  89.1   (4.85x)  436.1  71.3   (6.12x)
avg_h264_qpel_16_mc01_8 2894.6 376.1  (7.70x)  2842.3 300.6  (9.46x)
avg_h264_qpel_16_mc02_8 2987.3 348.4  (8.57x)  2967.3 290.1  (10.23x)
avg_h264_qpel_16_mc03_8 2885.3 376.4  (7.67x)  2842.3 300.6  (9.46x)
avg_h264_qpel_16_mc10_8 2404.1 524.4  (4.58x)  2404.8 456.8  (5.26x)
avg_h264_qpel_16_mc11_8 4709.4 811.6  (5.80x)  4675.6 706.8  (6.62x)
avg_h264_qpel_16_mc12_8 7477.9 1274.3 (5.87x)  7436.1 1061.1 (7.01x)
avg_h264_qpel_16_mc13_8 4718.6 820.6  (5.75x)  4655.1 706.8  (6.59x)
avg_h264_qpel_16_mc20_8 2052.1 487.1  (4.21x)  2071.3 446.3  (4.64x)
avg_h264_qpel_16_mc21_8 7440.6 1422.6 (5.23x)  6727.8 1217.3 (5.53x)
avg_h264_qpel_16_mc22_8 5051.9 950.4  (5.32x)  5071.6 790.3  (6.42x)
avg_h264_qpel_16_mc23_8 6764.9 1422.3 (4.76x)  6748.6 1217.3 (5.54x)
avg_h264_qpel_16_mc30_8 2413.1 524.4  (4.60x)  2415.1 467.3  (5.17x)
avg_h264_qpel_16_mc31_8 4681.6 839.1  (5.58x)  4675.6 727.6  (6.43x)
avg_h264_qpel_16_mc32_8 8579.6 1292.8 (6.64x)  7436.3 1071.3 (6.94x)
avg_h264_qpel_16_mc33_8 5375.9 829.9  (6.48x)  4665.3 717.3  (6.50x)
put_h264_qpel_4_mc00_8  24.4   24.4   (1.00x)
put_h264_qpel_4_mc01_8  987.4  79.8   (12.37x)
put_h264_qpel_4_mc02_8  190.8  79.8   (2.39x)
put_h264_qpel_4_mc03_8  209.6  89.1   (2.35x)
put_h264_qpel_4_mc10_8  163.3  117.1  (1.39x)
put_h264_qpel_4_mc11_8  339.4  181.6  (1.87x)
put_h264_qpel_4_mc12_8  533.6  348.4  (1.53x)
put_h264_qpel_4_mc13_8  339.4  190.8  (1.78x)
put_h264_qpel_4_mc20_8  126.3  116.8  (1.08x)
put_h264_qpel_4_mc21_8  468.9  376.1  (1.25x)
put_h264_qpel_4_mc22_8  330.1  274.4  (1.20x)
put_h264_qpel_4_mc23_8  468.9  376.1  (1.25x)
put_h264_qpel_4_mc30_8  163.3  126.3  (1.29x)
put_h264_qpel_4_mc31_8  339.1  191.1  (1.77x)
put_h264_qpel_4_mc32_8  533.6  348.4  (1.53x)
put_h264_qpel_4_mc33_8  339.4  181.8  (1.87x)
put_h264_qpel_8_mc00_8  98.6   33.6   (2.93x)  92.3   40.1   (2.30x)
put_h264_qpel_8_mc01_8  737.1  153.8  (4.79x)  738.1  144.3  (5.12x)
put_h264_qpel_8_mc02_8  663.1  135.3  (4.90x)  665.1  134.1  (4.96x)
put_h264_qpel_8_mc03_8  737.4  154.1  (4.79x)  1508.8 144.3  (10.46x)
put_h264_qpel_8_mc10_8  598.4  237.1  (2.52x)  592.3  227.6  (2.60x)
put_h264_qpel_8_mc11_8  1172.3 357.9  (3.28x)  1175.6 342.3  (3.43x)
put_h264_qpel_8_mc12_8  1867.1 589.1  (3.17x)  1863.1 561.1  (3.32x)
put_h264_qpel_8_mc13_8  1172.6 366.9  (3.20x)  1175.6 352.8  (3.33x)
put_h264_qpel_8_mc20_8  450.4  218.8  (2.06x)  446.3  206.8  (2.16x)
put_h264_qpel_8_mc21_8  1672.3 663.1  (2.52x)  1675.6 633.8  (2.64x)
put_h264_qpel_8_mc22_8  1144.6 1200.1 (0.95x)  1144.3 425.6  (2.69x)
put_h264_qpel_8_mc23_8  1672.6 672.4  (2.49x)  1665.3 634.1  (2.63x)
put_h264_qpel_8_mc30_8  598.6  237.3  (2.52x)  613.1  227.6  (2.69x)
put_h264_qpel_8_mc31_8  1172.3 376.1  (3.12x)  1175.6 352.6  (3.33x)
put_h264_qpel_8_mc32_8  1857.8 598.6  (3.10x)  1863.1 561.1  (3.32x)
put_h264_qpel_8_mc33_8  1172.3 376.1  (3.12x)  1175.6 352.8  (3.33x)
put_h264_qpel_16_mc00_8 320.6  61.4   (5.22x)  321.3  60.8   (5.28x)
put_h264_qpel_16_mc01_8 2774.3 339.1  (8.18x)  2759.1 279.8  (9.86x)
put_h264_qpel_16_mc02_8 2589.1 320.6  (8.08x)  2571.6 269.3  (9.55x)
put_h264_qpel_16_mc03_8 2774.3 339.4  (8.17x)  2738.1 290.1  (9.44x)
put_h264_qpel_16_mc10_8 2274.3 487.4  (4.67x)  2290.1 436.1  (5.25x)
put_h264_qpel_16_mc11_8 5237.1 792.9  (6.60x)  4529.8 685.8  (6.61x)
put_h264_qpel_16_mc12_8 7357.6 1255.8 (5.86x)  7352.8 1040.1 (7.07x)
put_h264_qpel_16_mc13_8 4579.9 792.9  (5.78x)  4571.6 686.1  (6.66x)
put_h264_qpel_16_mc20_8 1802.1 459.6  (3.92x)  1800.6 425.6  (4.23x)
put_h264_qpel_16_mc21_8 6644.6 2246.6 (2.96x)  6644.3 1196.6 (5.55x)
put_h264_qpel_16_mc22_8 4589.1 913.4  (5.02x)  4592.3 769.3  (5.97x)
put_h264_qpel_16_mc23_8 6644.6 1394.6 (4.76x)  6634.1 1196.6 (5.54x)
put_h264_qpel_16_mc30_8 2274.3 496.6  (4.58x)  2290.1 456.8  (5.01x)
put_h264_qpel_16_mc31_8 5255.6 802.1  (6.55x)  4550.8 706.8  (6.44x)
put_h264_qpel_16_mc32_8 7376.1 1265.1 (5.83x)  7352.8 1050.6 (7.00x)
put_h264_qpel_16_mc33_8 4579.9 802.1  (5.71x)  4561.1 696.3  (6.55x)

Signed-off-by: Niklas Haas <git@haasn.dev>
Signed-off-by: J. Dekker <jdek@itanimul.li>
2024-09-28 18:35:35 +02:00
..
vvc lavc/vvc_mc: R-V V avg w_avg 2024-09-24 20:04:51 +03:00
Makefile avcodec/riscv: add h264 qpel 2024-09-28 18:35:35 +02:00
aacencdsp_init.c
aacencdsp_rvv.S
aacpsdsp_init.c
aacpsdsp_rvv.S
ac3dsp_init.c
ac3dsp_rvb.S
ac3dsp_rvv.S
ac3dsp_rvvb.S
alacdsp_init.c
alacdsp_rvv.S
audiodsp_init.c
audiodsp_rvv.S
blockdsp_init.c
blockdsp_rvv.S
bswapdsp_init.c
bswapdsp_rvb.S
bswapdsp_rvv.S
bswapdsp_rvvb.S
cpu_common.c
exrdsp_init.c
exrdsp_rvv.S
fixed_vtype.S
flacdsp_init.c
flacdsp_rvv.S
fmtconvert_init.c
fmtconvert_rvv.S
g722dsp_init.c
g722dsp_rvv.S
h263dsp_init.c
h263dsp_rvv.S
h264_chroma_init_riscv.c
h264_mc_chroma.S
h264addpx_rvv.S
h264dsp_init.c lavc/h264dsp: optimise R-V V biweight for shorter heights 2024-09-24 20:04:51 +03:00
h264dsp_rvv.S lavc/h264dsp: optimise R-V V biweight for shorter heights 2024-09-24 20:04:51 +03:00
h264idct_rvv.S
h264qpel_init.c avcodec/riscv: add h264 qpel 2024-09-28 18:35:35 +02:00
h264qpel_rvv.S avcodec/riscv: add h264 qpel 2024-09-28 18:35:35 +02:00
huffyuvdsp_init.c
huffyuvdsp_rvv.S
idctdsp_init.c
idctdsp_rvv.S
jpeg2000dsp_init.c
jpeg2000dsp_rvv.S
llauddsp_init.c
llauddsp_rvv.S
llviddsp_init.c
llviddsp_rvv.S
llvidencdsp_init.c
llvidencdsp_rvv.S
lpc_init.c
lpc_rvv.S
me_cmp_init.c
me_cmp_rvv.S
mpegvideoencdsp_init.c
mpegvideoencdsp_rvv.S
opusdsp_init.c
opusdsp_rvv.S
pixblockdsp_init.c
pixblockdsp_rvi.S
pixblockdsp_rvv.S
rv34dsp_init.c
rv34dsp_rvv.S
rv40dsp_init.c
rv40dsp_rvv.S
sbrdsp_init.c
sbrdsp_rvv.S
startcode_rvb.S
startcode_rvv.S
svqenc_init.c
svqenc_rvv.S
takdsp_init.c
takdsp_rvv.S
utvideodsp_init.c
utvideodsp_rvv.S
vc1dsp_init.c
vc1dsp_rvi.S
vc1dsp_rvv.S
videodsp.S
videodsp_init.c
vorbisdsp_init.c
vorbisdsp_rvv.S
vp7dsp_init.c
vp7dsp_rvv.S
vp8dsp.h
vp8dsp_init.c
vp8dsp_rvi.S
vp8dsp_rvv.S
vp9_intra_rvv.S
vp9_mc_rvi.S
vp9_mc_rvv.S
vp9dsp.h
vp9dsp_init.c