ffmpeg/libavcodec/x86/vvc/vvcdsp_init.c

257 lines
10 KiB
C
Raw Normal View History

/*
* VVC DSP init for x86
*
* Copyright (C) 2022-2024 Nuo Mi
* Copyright (c) 2023-2024 Wu Jianhua
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/vvc/vvcdec.h"
#include "libavcodec/vvc/vvc_ctu.h"
#include "libavcodec/vvc/vvcdsp.h"
#include "libavcodec/x86/h26x/h2656dsp.h"
#if ARCH_X86_64
#define FW_PUT(name, depth, opt) \
static void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
int height, const int8_t *hf, const int8_t *vf, int width) \
{ \
ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
}
#define FW_PUT_TAP(fname, bitd, opt ) \
FW_PUT(fname##4, bitd, opt ); \
FW_PUT(fname##8, bitd, opt ); \
FW_PUT(fname##16, bitd, opt ); \
FW_PUT(fname##32, bitd, opt ); \
FW_PUT(fname##64, bitd, opt ); \
FW_PUT(fname##128, bitd, opt ); \
#define FW_PUT_4TAP(fname, bitd, opt) \
FW_PUT(fname ## 2, bitd, opt) \
FW_PUT_TAP(fname, bitd, opt)
#define FW_PUT_4TAP_SSE4(bitd) \
FW_PUT_4TAP(pixels, bitd, sse4) \
FW_PUT_4TAP(4tap_h, bitd, sse4) \
FW_PUT_4TAP(4tap_v, bitd, sse4) \
FW_PUT_4TAP(4tap_hv, bitd, sse4)
#define FW_PUT_8TAP_SSE4(bitd) \
FW_PUT_TAP(8tap_h, bitd, sse4) \
FW_PUT_TAP(8tap_v, bitd, sse4) \
FW_PUT_TAP(8tap_hv, bitd, sse4)
#define FW_PUT_SSE4(bitd) \
FW_PUT_4TAP_SSE4(bitd) \
FW_PUT_8TAP_SSE4(bitd)
FW_PUT_SSE4( 8);
FW_PUT_SSE4(10);
FW_PUT_SSE4(12);
#define FW_PUT_TAP_AVX2(n, bitd) \
FW_PUT(n ## tap_h32, bitd, avx2) \
FW_PUT(n ## tap_h64, bitd, avx2) \
FW_PUT(n ## tap_h128, bitd, avx2) \
FW_PUT(n ## tap_v32, bitd, avx2) \
FW_PUT(n ## tap_v64, bitd, avx2) \
FW_PUT(n ## tap_v128, bitd, avx2)
#define FW_PUT_AVX2(bitd) \
FW_PUT(pixels32, bitd, avx2) \
FW_PUT(pixels64, bitd, avx2) \
FW_PUT(pixels128, bitd, avx2) \
FW_PUT_TAP_AVX2(4, bitd) \
FW_PUT_TAP_AVX2(8, bitd) \
FW_PUT_AVX2( 8)
FW_PUT_AVX2(10)
FW_PUT_AVX2(12)
#define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
FW_PUT(n ## tap_h16, bitd, avx2) \
FW_PUT(n ## tap_v16, bitd, avx2) \
FW_PUT(n ## tap_hv16, bitd, avx2) \
FW_PUT(n ## tap_hv32, bitd, avx2) \
FW_PUT(n ## tap_hv64, bitd, avx2) \
FW_PUT(n ## tap_hv128, bitd, avx2)
#define FW_PUT_16BPC_AVX2(bitd) \
FW_PUT(pixels16, bitd, avx2) \
FW_PUT_TAP_16BPC_AVX2(4, bitd) \
FW_PUT_TAP_16BPC_AVX2(8, bitd);
FW_PUT_16BPC_AVX2(10);
FW_PUT_16BPC_AVX2(12);
#define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
dst[C][W][idx1][idx2] = ff_vvc_put_## name ## _ ## D ## _##opt; \
dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
#define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
#define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
#define MC_8TAP_LINKS_SSE4(bd) \
MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
#define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
#define MC_4TAP_LINKS_SSE4(bd) \
MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
#define MC_LINK_SSE4(bd) \
MC_4TAP_LINKS_SSE4(bd) \
MC_8TAP_LINKS_SSE4(bd)
#define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
} while (0)
#define MC_LINKS_AVX2(bd) \
MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
#define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
} while (0)
#define MC_LINKS_16BPC_AVX2(bd) \
MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
avcodec/x86/vvc: add avg and avg_w AVX2 optimizations The avg/avg_w is based on dav1d. See https://code.videolan.org/videolan/dav1d/-/blob/master/src/x86/mc_avx2.asm vvc_avg_8_2x2_c: 71.6 vvc_avg_8_2x2_avx2: 26.8 vvc_avg_8_2x4_c: 140.8 vvc_avg_8_2x4_avx2: 34.6 vvc_avg_8_2x8_c: 410.3 vvc_avg_8_2x8_avx2: 41.3 vvc_avg_8_2x16_c: 769.3 vvc_avg_8_2x16_avx2: 60.3 vvc_avg_8_2x32_c: 1669.6 vvc_avg_8_2x32_avx2: 105.1 vvc_avg_8_2x64_c: 1978.3 vvc_avg_8_2x64_avx2: 425.8 vvc_avg_8_2x128_c: 6536.8 vvc_avg_8_2x128_avx2: 1315.1 vvc_avg_8_4x2_c: 155.6 vvc_avg_8_4x2_avx2: 26.1 vvc_avg_8_4x4_c: 250.3 vvc_avg_8_4x4_avx2: 31.3 vvc_avg_8_4x8_c: 831.8 vvc_avg_8_4x8_avx2: 41.3 vvc_avg_8_4x16_c: 1461.1 vvc_avg_8_4x16_avx2: 57.1 vvc_avg_8_4x32_c: 2821.6 vvc_avg_8_4x32_avx2: 105.1 vvc_avg_8_4x64_c: 3615.8 vvc_avg_8_4x64_avx2: 412.6 vvc_avg_8_4x128_c: 11962.6 vvc_avg_8_4x128_avx2: 1274.3 vvc_avg_8_8x2_c: 215.8 vvc_avg_8_8x2_avx2: 29.1 vvc_avg_8_8x4_c: 430.6 vvc_avg_8_8x4_avx2: 37.6 vvc_avg_8_8x8_c: 1463.3 vvc_avg_8_8x8_avx2: 51.8 vvc_avg_8_8x16_c: 2630.1 vvc_avg_8_8x16_avx2: 97.6 vvc_avg_8_8x32_c: 5813.8 vvc_avg_8_8x32_avx2: 196.6 vvc_avg_8_8x64_c: 6687.3 vvc_avg_8_8x64_avx2: 487.8 vvc_avg_8_8x128_c: 13178.6 vvc_avg_8_8x128_avx2: 1290.6 vvc_avg_8_16x2_c: 443.8 vvc_avg_8_16x2_avx2: 28.3 vvc_avg_8_16x4_c: 1253.3 vvc_avg_8_16x4_avx2: 32.1 vvc_avg_8_16x8_c: 2236.3 vvc_avg_8_16x8_avx2: 44.3 vvc_avg_8_16x16_c: 5127.8 vvc_avg_8_16x16_avx2: 63.3 vvc_avg_8_16x32_c: 6573.3 vvc_avg_8_16x32_avx2: 223.6 vvc_avg_8_16x64_c: 30311.8 vvc_avg_8_16x64_avx2: 437.8 vvc_avg_8_16x128_c: 25693.3 vvc_avg_8_16x128_avx2: 1266.8 vvc_avg_8_32x2_c: 954.6 vvc_avg_8_32x2_avx2: 32.1 vvc_avg_8_32x4_c: 2359.6 vvc_avg_8_32x4_avx2: 39.6 vvc_avg_8_32x8_c: 5703.6 vvc_avg_8_32x8_avx2: 57.1 vvc_avg_8_32x16_c: 9967.6 vvc_avg_8_32x16_avx2: 107.1 vvc_avg_8_32x32_c: 21327.6 vvc_avg_8_32x32_avx2: 272.6 vvc_avg_8_32x64_c: 39240.8 vvc_avg_8_32x64_avx2: 529.6 vvc_avg_8_32x128_c: 52580.8 vvc_avg_8_32x128_avx2: 1338.8 vvc_avg_8_64x2_c: 1647.3 vvc_avg_8_64x2_avx2: 38.8 vvc_avg_8_64x4_c: 5130.1 vvc_avg_8_64x4_avx2: 58.8 vvc_avg_8_64x8_c: 6529.3 vvc_avg_8_64x8_avx2: 88.3 vvc_avg_8_64x16_c: 19913.6 vvc_avg_8_64x16_avx2: 162.3 vvc_avg_8_64x32_c: 39360.8 vvc_avg_8_64x32_avx2: 295.8 vvc_avg_8_64x64_c: 49658.3 vvc_avg_8_64x64_avx2: 784.1 vvc_avg_8_64x128_c: 108513.1 vvc_avg_8_64x128_avx2: 1977.1 vvc_avg_8_128x2_c: 3226.1 vvc_avg_8_128x2_avx2: 61.1 vvc_avg_8_128x4_c: 10280.3 vvc_avg_8_128x4_avx2: 94.6 vvc_avg_8_128x8_c: 18079.3 vvc_avg_8_128x8_avx2: 155.3 vvc_avg_8_128x16_c: 45121.8 vvc_avg_8_128x16_avx2: 285.3 vvc_avg_8_128x32_c: 48651.8 vvc_avg_8_128x32_avx2: 581.6 vvc_avg_8_128x64_c: 165078.6 vvc_avg_8_128x64_avx2: 1942.8 vvc_avg_8_128x128_c: 339103.1 vvc_avg_8_128x128_avx2: 4332.6 vvc_avg_10_2x2_c: 144.3 vvc_avg_10_2x2_avx2: 26.8 vvc_avg_10_2x4_c: 142.6 vvc_avg_10_2x4_avx2: 45.3 vvc_avg_10_2x8_c: 478.1 vvc_avg_10_2x8_avx2: 38.1 vvc_avg_10_2x16_c: 518.3 vvc_avg_10_2x16_avx2: 58.1 vvc_avg_10_2x32_c: 2059.8 vvc_avg_10_2x32_avx2: 93.1 vvc_avg_10_2x64_c: 2383.8 vvc_avg_10_2x64_avx2: 714.8 vvc_avg_10_2x128_c: 4498.3 vvc_avg_10_2x128_avx2: 1466.3 vvc_avg_10_4x2_c: 228.6 vvc_avg_10_4x2_avx2: 26.8 vvc_avg_10_4x4_c: 378.3 vvc_avg_10_4x4_avx2: 30.6 vvc_avg_10_4x8_c: 866.8 vvc_avg_10_4x8_avx2: 44.6 vvc_avg_10_4x16_c: 1018.1 vvc_avg_10_4x16_avx2: 58.1 vvc_avg_10_4x32_c: 3590.8 vvc_avg_10_4x32_avx2: 128.8 vvc_avg_10_4x64_c: 4200.8 vvc_avg_10_4x64_avx2: 663.6 vvc_avg_10_4x128_c: 8450.8 vvc_avg_10_4x128_avx2: 1531.8 vvc_avg_10_8x2_c: 369.3 vvc_avg_10_8x2_avx2: 28.3 vvc_avg_10_8x4_c: 513.8 vvc_avg_10_8x4_avx2: 32.1 vvc_avg_10_8x8_c: 1720.3 vvc_avg_10_8x8_avx2: 49.1 vvc_avg_10_8x16_c: 1894.8 vvc_avg_10_8x16_avx2: 71.6 vvc_avg_10_8x32_c: 3931.3 vvc_avg_10_8x32_avx2: 148.1 vvc_avg_10_8x64_c: 7964.3 vvc_avg_10_8x64_avx2: 613.1 vvc_avg_10_8x128_c: 15540.1 vvc_avg_10_8x128_avx2: 1585.1 vvc_avg_10_16x2_c: 877.3 vvc_avg_10_16x2_avx2: 27.6 vvc_avg_10_16x4_c: 955.8 vvc_avg_10_16x4_avx2: 29.8 vvc_avg_10_16x8_c: 3419.6 vvc_avg_10_16x8_avx2: 62.6 vvc_avg_10_16x16_c: 3826.8 vvc_avg_10_16x16_avx2: 54.3 vvc_avg_10_16x32_c: 7655.3 vvc_avg_10_16x32_avx2: 86.3 vvc_avg_10_16x64_c: 30011.1 vvc_avg_10_16x64_avx2: 692.6 vvc_avg_10_16x128_c: 47894.8 vvc_avg_10_16x128_avx2: 1580.3 vvc_avg_10_32x2_c: 944.3 vvc_avg_10_32x2_avx2: 29.8 vvc_avg_10_32x4_c: 2022.6 vvc_avg_10_32x4_avx2: 35.1 vvc_avg_10_32x8_c: 6148.8 vvc_avg_10_32x8_avx2: 51.3 vvc_avg_10_32x16_c: 12601.6 vvc_avg_10_32x16_avx2: 70.8 vvc_avg_10_32x32_c: 15958.6 vvc_avg_10_32x32_avx2: 124.3 vvc_avg_10_32x64_c: 31784.6 vvc_avg_10_32x64_avx2: 757.3 vvc_avg_10_32x128_c: 63892.8 vvc_avg_10_32x128_avx2: 1711.3 vvc_avg_10_64x2_c: 1890.8 vvc_avg_10_64x2_avx2: 34.3 vvc_avg_10_64x4_c: 6267.3 vvc_avg_10_64x4_avx2: 42.6 vvc_avg_10_64x8_c: 12778.1 vvc_avg_10_64x8_avx2: 67.8 vvc_avg_10_64x16_c: 22304.3 vvc_avg_10_64x16_avx2: 116.8 vvc_avg_10_64x32_c: 30777.1 vvc_avg_10_64x32_avx2: 201.1 vvc_avg_10_64x64_c: 60169.1 vvc_avg_10_64x64_avx2: 1454.3 vvc_avg_10_64x128_c: 124392.8 vvc_avg_10_64x128_avx2: 3648.6 vvc_avg_10_128x2_c: 3650.1 vvc_avg_10_128x2_avx2: 41.1 vvc_avg_10_128x4_c: 22887.8 vvc_avg_10_128x4_avx2: 64.1 vvc_avg_10_128x8_c: 14622.6 vvc_avg_10_128x8_avx2: 111.6 vvc_avg_10_128x16_c: 62207.6 vvc_avg_10_128x16_avx2: 186.3 vvc_avg_10_128x32_c: 59761.3 vvc_avg_10_128x32_avx2: 374.6 vvc_avg_10_128x64_c: 117504.3 vvc_avg_10_128x64_avx2: 2684.6 vvc_avg_10_128x128_c: 236767.6 vvc_avg_10_128x128_avx2: 15278.1 vvc_avg_12_2x2_c: 78.6 vvc_avg_12_2x2_avx2: 26.1 vvc_avg_12_2x4_c: 254.1 vvc_avg_12_2x4_avx2: 30.6 vvc_avg_12_2x8_c: 261.8 vvc_avg_12_2x8_avx2: 39.1 vvc_avg_12_2x16_c: 527.6 vvc_avg_12_2x16_avx2: 57.3 vvc_avg_12_2x32_c: 1089.1 vvc_avg_12_2x32_avx2: 93.8 vvc_avg_12_2x64_c: 2337.6 vvc_avg_12_2x64_avx2: 707.1 vvc_avg_12_2x128_c: 4582.1 vvc_avg_12_2x128_avx2: 1414.6 vvc_avg_12_4x2_c: 129.6 vvc_avg_12_4x2_avx2: 26.8 vvc_avg_12_4x4_c: 427.3 vvc_avg_12_4x4_avx2: 30.6 vvc_avg_12_4x8_c: 529.6 vvc_avg_12_4x8_avx2: 36.6 vvc_avg_12_4x16_c: 1022.1 vvc_avg_12_4x16_avx2: 57.3 vvc_avg_12_4x32_c: 1987.6 vvc_avg_12_4x32_avx2: 84.3 vvc_avg_12_4x64_c: 4147.6 vvc_avg_12_4x64_avx2: 706.3 vvc_avg_12_4x128_c: 8469.3 vvc_avg_12_4x128_avx2: 1448.3 vvc_avg_12_8x2_c: 253.6 vvc_avg_12_8x2_avx2: 27.6 vvc_avg_12_8x4_c: 836.3 vvc_avg_12_8x4_avx2: 32.1 vvc_avg_12_8x8_c: 1074.6 vvc_avg_12_8x8_avx2: 45.1 vvc_avg_12_8x16_c: 3616.8 vvc_avg_12_8x16_avx2: 71.6 vvc_avg_12_8x32_c: 3823.6 vvc_avg_12_8x32_avx2: 140.1 vvc_avg_12_8x64_c: 7764.8 vvc_avg_12_8x64_avx2: 656.1 vvc_avg_12_8x128_c: 15896.1 vvc_avg_12_8x128_avx2: 1232.8 vvc_avg_12_16x2_c: 462.1 vvc_avg_12_16x2_avx2: 26.8 vvc_avg_12_16x4_c: 1732.1 vvc_avg_12_16x4_avx2: 29.1 vvc_avg_12_16x8_c: 2097.6 vvc_avg_12_16x8_avx2: 62.6 vvc_avg_12_16x16_c: 6753.1 vvc_avg_12_16x16_avx2: 47.8 vvc_avg_12_16x32_c: 7373.1 vvc_avg_12_16x32_avx2: 80.8 vvc_avg_12_16x64_c: 15046.3 vvc_avg_12_16x64_avx2: 621.1 vvc_avg_12_16x128_c: 52574.6 vvc_avg_12_16x128_avx2: 1417.1 vvc_avg_12_32x2_c: 1712.1 vvc_avg_12_32x2_avx2: 29.8 vvc_avg_12_32x4_c: 2036.8 vvc_avg_12_32x4_avx2: 37.6 vvc_avg_12_32x8_c: 4017.6 vvc_avg_12_32x8_avx2: 44.1 vvc_avg_12_32x16_c: 8018.6 vvc_avg_12_32x16_avx2: 70.8 vvc_avg_12_32x32_c: 15637.6 vvc_avg_12_32x32_avx2: 124.3 vvc_avg_12_32x64_c: 31143.3 vvc_avg_12_32x64_avx2: 830.3 vvc_avg_12_32x128_c: 75706.8 vvc_avg_12_32x128_avx2: 1604.8 vvc_avg_12_64x2_c: 3230.3 vvc_avg_12_64x2_avx2: 33.6 vvc_avg_12_64x4_c: 4139.6 vvc_avg_12_64x4_avx2: 45.1 vvc_avg_12_64x8_c: 8201.6 vvc_avg_12_64x8_avx2: 67.1 vvc_avg_12_64x16_c: 25632.3 vvc_avg_12_64x16_avx2: 110.3 vvc_avg_12_64x32_c: 30744.3 vvc_avg_12_64x32_avx2: 200.3 vvc_avg_12_64x64_c: 105554.8 vvc_avg_12_64x64_avx2: 1325.6 vvc_avg_12_64x128_c: 235254.3 vvc_avg_12_64x128_avx2: 3132.6 vvc_avg_12_128x2_c: 6194.3 vvc_avg_12_128x2_avx2: 55.1 vvc_avg_12_128x4_c: 7583.8 vvc_avg_12_128x4_avx2: 79.3 vvc_avg_12_128x8_c: 14635.6 vvc_avg_12_128x8_avx2: 104.3 vvc_avg_12_128x16_c: 29270.8 vvc_avg_12_128x16_avx2: 194.3 vvc_avg_12_128x32_c: 60113.6 vvc_avg_12_128x32_avx2: 346.3 vvc_avg_12_128x64_c: 197030.3 vvc_avg_12_128x64_avx2: 2779.6 vvc_avg_12_128x128_c: 432809.6 vvc_avg_12_128x128_avx2: 5513.3 vvc_w_avg_8_2x2_c: 84.3 vvc_w_avg_8_2x2_avx2: 42.6 vvc_w_avg_8_2x4_c: 156.3 vvc_w_avg_8_2x4_avx2: 58.8 vvc_w_avg_8_2x8_c: 310.6 vvc_w_avg_8_2x8_avx2: 73.1 vvc_w_avg_8_2x16_c: 942.1 vvc_w_avg_8_2x16_avx2: 113.3 vvc_w_avg_8_2x32_c: 1098.8 vvc_w_avg_8_2x32_avx2: 202.6 vvc_w_avg_8_2x64_c: 2414.3 vvc_w_avg_8_2x64_avx2: 467.6 vvc_w_avg_8_2x128_c: 4763.8 vvc_w_avg_8_2x128_avx2: 1333.1 vvc_w_avg_8_4x2_c: 140.1 vvc_w_avg_8_4x2_avx2: 49.8 vvc_w_avg_8_4x4_c: 276.3 vvc_w_avg_8_4x4_avx2: 58.1 vvc_w_avg_8_4x8_c: 524.3 vvc_w_avg_8_4x8_avx2: 72.3 vvc_w_avg_8_4x16_c: 1108.1 vvc_w_avg_8_4x16_avx2: 111.8 vvc_w_avg_8_4x32_c: 2149.8 vvc_w_avg_8_4x32_avx2: 199.6 vvc_w_avg_8_4x64_c: 12288.1 vvc_w_avg_8_4x64_avx2: 509.3 vvc_w_avg_8_4x128_c: 8398.6 vvc_w_avg_8_4x128_avx2: 1319.6 vvc_w_avg_8_8x2_c: 271.1 vvc_w_avg_8_8x2_avx2: 44.1 vvc_w_avg_8_8x4_c: 503.3 vvc_w_avg_8_8x4_avx2: 61.8 vvc_w_avg_8_8x8_c: 1031.1 vvc_w_avg_8_8x8_avx2: 93.8 vvc_w_avg_8_8x16_c: 2009.8 vvc_w_avg_8_8x16_avx2: 163.1 vvc_w_avg_8_8x32_c: 4161.3 vvc_w_avg_8_8x32_avx2: 292.1 vvc_w_avg_8_8x64_c: 7940.6 vvc_w_avg_8_8x64_avx2: 592.1 vvc_w_avg_8_8x128_c: 16802.3 vvc_w_avg_8_8x128_avx2: 1287.6 vvc_w_avg_8_16x2_c: 762.6 vvc_w_avg_8_16x2_avx2: 53.6 vvc_w_avg_8_16x4_c: 1486.3 vvc_w_avg_8_16x4_avx2: 67.1 vvc_w_avg_8_16x8_c: 1907.8 vvc_w_avg_8_16x8_avx2: 96.8 vvc_w_avg_8_16x16_c: 3883.6 vvc_w_avg_8_16x16_avx2: 151.3 vvc_w_avg_8_16x32_c: 7974.8 vvc_w_avg_8_16x32_avx2: 285.8 vvc_w_avg_8_16x64_c: 25160.6 vvc_w_avg_8_16x64_avx2: 589.8 vvc_w_avg_8_16x128_c: 58328.1 vvc_w_avg_8_16x128_avx2: 1169.8 vvc_w_avg_8_32x2_c: 1009.1 vvc_w_avg_8_32x2_avx2: 65.6 vvc_w_avg_8_32x4_c: 2091.1 vvc_w_avg_8_32x4_avx2: 96.8 vvc_w_avg_8_32x8_c: 3997.8 vvc_w_avg_8_32x8_avx2: 156.3 vvc_w_avg_8_32x16_c: 8216.8 vvc_w_avg_8_32x16_avx2: 269.6 vvc_w_avg_8_32x32_c: 21746.1 vvc_w_avg_8_32x32_avx2: 635.3 vvc_w_avg_8_32x64_c: 31564.8 vvc_w_avg_8_32x64_avx2: 1010.6 vvc_w_avg_8_32x128_c: 114373.3 vvc_w_avg_8_32x128_avx2: 2013.6 vvc_w_avg_8_64x2_c: 2067.3 vvc_w_avg_8_64x2_avx2: 97.6 vvc_w_avg_8_64x4_c: 3901.1 vvc_w_avg_8_64x4_avx2: 154.8 vvc_w_avg_8_64x8_c: 7911.6 vvc_w_avg_8_64x8_avx2: 268.8 vvc_w_avg_8_64x16_c: 16508.8 vvc_w_avg_8_64x16_avx2: 501.8 vvc_w_avg_8_64x32_c: 38770.3 vvc_w_avg_8_64x32_avx2: 1287.6 vvc_w_avg_8_64x64_c: 110350.6 vvc_w_avg_8_64x64_avx2: 1890.8 vvc_w_avg_8_64x128_c: 141354.6 vvc_w_avg_8_64x128_avx2: 3839.6 vvc_w_avg_8_128x2_c: 7012.1 vvc_w_avg_8_128x2_avx2: 159.3 vvc_w_avg_8_128x4_c: 8146.8 vvc_w_avg_8_128x4_avx2: 272.6 vvc_w_avg_8_128x8_c: 24596.8 vvc_w_avg_8_128x8_avx2: 501.1 vvc_w_avg_8_128x16_c: 35918.1 vvc_w_avg_8_128x16_avx2: 948.8 vvc_w_avg_8_128x32_c: 68799.6 vvc_w_avg_8_128x32_avx2: 1963.1 vvc_w_avg_8_128x64_c: 133862.1 vvc_w_avg_8_128x64_avx2: 3833.6 vvc_w_avg_8_128x128_c: 348427.8 vvc_w_avg_8_128x128_avx2: 7682.8 vvc_w_avg_10_2x2_c: 118.6 vvc_w_avg_10_2x2_avx2: 73.1 vvc_w_avg_10_2x4_c: 189.1 vvc_w_avg_10_2x4_avx2: 89.3 vvc_w_avg_10_2x8_c: 382.8 vvc_w_avg_10_2x8_avx2: 179.8 vvc_w_avg_10_2x16_c: 658.3 vvc_w_avg_10_2x16_avx2: 185.1 vvc_w_avg_10_2x32_c: 1409.3 vvc_w_avg_10_2x32_avx2: 290.8 vvc_w_avg_10_2x64_c: 2906.8 vvc_w_avg_10_2x64_avx2: 793.1 vvc_w_avg_10_2x128_c: 6292.6 vvc_w_avg_10_2x128_avx2: 1696.8 vvc_w_avg_10_4x2_c: 178.8 vvc_w_avg_10_4x2_avx2: 80.1 vvc_w_avg_10_4x4_c: 581.6 vvc_w_avg_10_4x4_avx2: 97.6 vvc_w_avg_10_4x8_c: 693.3 vvc_w_avg_10_4x8_avx2: 128.1 vvc_w_avg_10_4x16_c: 1436.6 vvc_w_avg_10_4x16_avx2: 179.8 vvc_w_avg_10_4x32_c: 2409.1 vvc_w_avg_10_4x32_avx2: 292.3 vvc_w_avg_10_4x64_c: 4925.3 vvc_w_avg_10_4x64_avx2: 746.1 vvc_w_avg_10_4x128_c: 10664.6 vvc_w_avg_10_4x128_avx2: 1647.6 vvc_w_avg_10_8x2_c: 359.3 vvc_w_avg_10_8x2_avx2: 80.1 vvc_w_avg_10_8x4_c: 925.6 vvc_w_avg_10_8x4_avx2: 97.6 vvc_w_avg_10_8x8_c: 1360.6 vvc_w_avg_10_8x8_avx2: 121.8 vvc_w_avg_10_8x16_c: 3490.3 vvc_w_avg_10_8x16_avx2: 203.3 vvc_w_avg_10_8x32_c: 5266.1 vvc_w_avg_10_8x32_avx2: 325.8 vvc_w_avg_10_8x64_c: 11127.1 vvc_w_avg_10_8x64_avx2: 747.8 vvc_w_avg_10_8x128_c: 31058.3 vvc_w_avg_10_8x128_avx2: 1424.6 vvc_w_avg_10_16x2_c: 624.8 vvc_w_avg_10_16x2_avx2: 84.6 vvc_w_avg_10_16x4_c: 1389.6 vvc_w_avg_10_16x4_avx2: 109.1 vvc_w_avg_10_16x8_c: 2688.3 vvc_w_avg_10_16x8_avx2: 137.1 vvc_w_avg_10_16x16_c: 5387.1 vvc_w_avg_10_16x16_avx2: 224.6 vvc_w_avg_10_16x32_c: 10776.3 vvc_w_avg_10_16x32_avx2: 312.1 vvc_w_avg_10_16x64_c: 18069.1 vvc_w_avg_10_16x64_avx2: 858.6 vvc_w_avg_10_16x128_c: 43460.3 vvc_w_avg_10_16x128_avx2: 1411.6 vvc_w_avg_10_32x2_c: 1232.8 vvc_w_avg_10_32x2_avx2: 99.1 vvc_w_avg_10_32x4_c: 4017.6 vvc_w_avg_10_32x4_avx2: 134.1 vvc_w_avg_10_32x8_c: 9306.3 vvc_w_avg_10_32x8_avx2: 208.1 vvc_w_avg_10_32x16_c: 8424.6 vvc_w_avg_10_32x16_avx2: 349.3 vvc_w_avg_10_32x32_c: 20787.8 vvc_w_avg_10_32x32_avx2: 655.3 vvc_w_avg_10_32x64_c: 40972.1 vvc_w_avg_10_32x64_avx2: 904.8 vvc_w_avg_10_32x128_c: 85670.3 vvc_w_avg_10_32x128_avx2: 1751.6 vvc_w_avg_10_64x2_c: 2454.1 vvc_w_avg_10_64x2_avx2: 132.6 vvc_w_avg_10_64x4_c: 5012.6 vvc_w_avg_10_64x4_avx2: 215.6 vvc_w_avg_10_64x8_c: 10811.3 vvc_w_avg_10_64x8_avx2: 361.1 vvc_w_avg_10_64x16_c: 33349.1 vvc_w_avg_10_64x16_avx2: 904.1 vvc_w_avg_10_64x32_c: 41892.3 vvc_w_avg_10_64x32_avx2: 1220.6 vvc_w_avg_10_64x64_c: 66983.3 vvc_w_avg_10_64x64_avx2: 2622.1 vvc_w_avg_10_64x128_c: 246508.8 vvc_w_avg_10_64x128_avx2: 3316.8 vvc_w_avg_10_128x2_c: 7791.6 vvc_w_avg_10_128x2_avx2: 198.8 vvc_w_avg_10_128x4_c: 10534.3 vvc_w_avg_10_128x4_avx2: 337.3 vvc_w_avg_10_128x8_c: 21142.3 vvc_w_avg_10_128x8_avx2: 614.8 vvc_w_avg_10_128x16_c: 40968.6 vvc_w_avg_10_128x16_avx2: 1160.6 vvc_w_avg_10_128x32_c: 113043.3 vvc_w_avg_10_128x32_avx2: 1644.6 vvc_w_avg_10_128x64_c: 230658.3 vvc_w_avg_10_128x64_avx2: 5065.3 vvc_w_avg_10_128x128_c: 335236.3 vvc_w_avg_10_128x128_avx2: 6450.3 vvc_w_avg_12_2x2_c: 185.3 vvc_w_avg_12_2x2_avx2: 43.6 vvc_w_avg_12_2x4_c: 340.3 vvc_w_avg_12_2x4_avx2: 55.8 vvc_w_avg_12_2x8_c: 632.3 vvc_w_avg_12_2x8_avx2: 70.1 vvc_w_avg_12_2x16_c: 728.3 vvc_w_avg_12_2x16_avx2: 108.1 vvc_w_avg_12_2x32_c: 1392.6 vvc_w_avg_12_2x32_avx2: 176.8 vvc_w_avg_12_2x64_c: 2618.3 vvc_w_avg_12_2x64_avx2: 757.3 vvc_w_avg_12_2x128_c: 6408.8 vvc_w_avg_12_2x128_avx2: 1435.1 vvc_w_avg_12_4x2_c: 349.3 vvc_w_avg_12_4x2_avx2: 44.3 vvc_w_avg_12_4x4_c: 607.1 vvc_w_avg_12_4x4_avx2: 52.6 vvc_w_avg_12_4x8_c: 1134.8 vvc_w_avg_12_4x8_avx2: 70.1 vvc_w_avg_12_4x16_c: 1378.1 vvc_w_avg_12_4x16_avx2: 115.3 vvc_w_avg_12_4x32_c: 2599.3 vvc_w_avg_12_4x32_avx2: 174.3 vvc_w_avg_12_4x64_c: 4474.8 vvc_w_avg_12_4x64_avx2: 656.1 vvc_w_avg_12_4x128_c: 11319.6 vvc_w_avg_12_4x128_avx2: 1373.1 vvc_w_avg_12_8x2_c: 595.8 vvc_w_avg_12_8x2_avx2: 44.3 vvc_w_avg_12_8x4_c: 1164.3 vvc_w_avg_12_8x4_avx2: 56.6 vvc_w_avg_12_8x8_c: 2019.6 vvc_w_avg_12_8x8_avx2: 80.1 vvc_w_avg_12_8x16_c: 4071.6 vvc_w_avg_12_8x16_avx2: 139.3 vvc_w_avg_12_8x32_c: 4485.1 vvc_w_avg_12_8x32_avx2: 250.6 vvc_w_avg_12_8x64_c: 8404.8 vvc_w_avg_12_8x64_avx2: 735.8 vvc_w_avg_12_8x128_c: 35679.8 vvc_w_avg_12_8x128_avx2: 1252.6 vvc_w_avg_12_16x2_c: 1114.8 vvc_w_avg_12_16x2_avx2: 46.6 vvc_w_avg_12_16x4_c: 2240.1 vvc_w_avg_12_16x4_avx2: 62.6 vvc_w_avg_12_16x8_c: 13174.6 vvc_w_avg_12_16x8_avx2: 88.6 vvc_w_avg_12_16x16_c: 5334.6 vvc_w_avg_12_16x16_avx2: 144.3 vvc_w_avg_12_16x32_c: 8378.1 vvc_w_avg_12_16x32_avx2: 234.6 vvc_w_avg_12_16x64_c: 21300.8 vvc_w_avg_12_16x64_avx2: 761.8 vvc_w_avg_12_16x128_c: 32786.8 vvc_w_avg_12_16x128_avx2: 1432.8 vvc_w_avg_12_32x2_c: 2154.3 vvc_w_avg_12_32x2_avx2: 61.1 vvc_w_avg_12_32x4_c: 4299.8 vvc_w_avg_12_32x4_avx2: 83.1 vvc_w_avg_12_32x8_c: 7964.8 vvc_w_avg_12_32x8_avx2: 132.6 vvc_w_avg_12_32x16_c: 13321.6 vvc_w_avg_12_32x16_avx2: 234.6 vvc_w_avg_12_32x32_c: 21149.3 vvc_w_avg_12_32x32_avx2: 433.3 vvc_w_avg_12_32x64_c: 43666.6 vvc_w_avg_12_32x64_avx2: 876.6 vvc_w_avg_12_32x128_c: 83189.8 vvc_w_avg_12_32x128_avx2: 1756.6 vvc_w_avg_12_64x2_c: 3829.8 vvc_w_avg_12_64x2_avx2: 83.1 vvc_w_avg_12_64x4_c: 8588.1 vvc_w_avg_12_64x4_avx2: 127.1 vvc_w_avg_12_64x8_c: 17027.6 vvc_w_avg_12_64x8_avx2: 310.6 vvc_w_avg_12_64x16_c: 29797.8 vvc_w_avg_12_64x16_avx2: 415.6 vvc_w_avg_12_64x32_c: 43854.3 vvc_w_avg_12_64x32_avx2: 773.3 vvc_w_avg_12_64x64_c: 137767.3 vvc_w_avg_12_64x64_avx2: 1608.6 vvc_w_avg_12_64x128_c: 316428.3 vvc_w_avg_12_64x128_avx2: 3249.8 vvc_w_avg_12_128x2_c: 8824.6 vvc_w_avg_12_128x2_avx2: 130.3 vvc_w_avg_12_128x4_c: 17173.6 vvc_w_avg_12_128x4_avx2: 219.3 vvc_w_avg_12_128x8_c: 21997.8 vvc_w_avg_12_128x8_avx2: 397.3 vvc_w_avg_12_128x16_c: 43553.8 vvc_w_avg_12_128x16_avx2: 790.1 vvc_w_avg_12_128x32_c: 89792.1 vvc_w_avg_12_128x32_avx2: 1497.6 vvc_w_avg_12_128x64_c: 226573.3 vvc_w_avg_12_128x64_avx2: 3153.1 vvc_w_avg_12_128x128_c: 332090.1 vvc_w_avg_12_128x128_avx2: 6499.6 Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
2024-01-23 18:17:10 +00:00
#define bf(fn, bd, opt) fn##_##bd##_##opt
#define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
#define AVG_BPC_FUNC(bpc, opt) \
void BF(ff_vvc_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, intptr_t pixel_max); \
void BF(ff_vvc_w_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, \
intptr_t denom, intptr_t w0, intptr_t w1, intptr_t o0, intptr_t o1, intptr_t pixel_max);
#define AVG_FUNCS(bpc, bd, opt) \
static void bf(avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height) \
{ \
BF(ff_vvc_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, (1 << bd) - 1); \
} \
static void bf(w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height, \
int denom, int w0, int w1, int o0, int o1) \
{ \
BF(ff_vvc_w_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, \
denom, w0, w1, o0, o1, (1 << bd) - 1); \
}
AVG_BPC_FUNC(8, avx2)
AVG_BPC_FUNC(16, avx2)
AVG_FUNCS(8, 8, avx2)
AVG_FUNCS(16, 10, avx2)
AVG_FUNCS(16, 12, avx2)
#define AVG_INIT(bd, opt) do { \
c->inter.avg = bf(avg, bd, opt); \
c->inter.w_avg = bf(w_avg, bd, opt); \
} while (0)
#endif
avcodec/x86/vvc: add avg and avg_w AVX2 optimizations The avg/avg_w is based on dav1d. See https://code.videolan.org/videolan/dav1d/-/blob/master/src/x86/mc_avx2.asm vvc_avg_8_2x2_c: 71.6 vvc_avg_8_2x2_avx2: 26.8 vvc_avg_8_2x4_c: 140.8 vvc_avg_8_2x4_avx2: 34.6 vvc_avg_8_2x8_c: 410.3 vvc_avg_8_2x8_avx2: 41.3 vvc_avg_8_2x16_c: 769.3 vvc_avg_8_2x16_avx2: 60.3 vvc_avg_8_2x32_c: 1669.6 vvc_avg_8_2x32_avx2: 105.1 vvc_avg_8_2x64_c: 1978.3 vvc_avg_8_2x64_avx2: 425.8 vvc_avg_8_2x128_c: 6536.8 vvc_avg_8_2x128_avx2: 1315.1 vvc_avg_8_4x2_c: 155.6 vvc_avg_8_4x2_avx2: 26.1 vvc_avg_8_4x4_c: 250.3 vvc_avg_8_4x4_avx2: 31.3 vvc_avg_8_4x8_c: 831.8 vvc_avg_8_4x8_avx2: 41.3 vvc_avg_8_4x16_c: 1461.1 vvc_avg_8_4x16_avx2: 57.1 vvc_avg_8_4x32_c: 2821.6 vvc_avg_8_4x32_avx2: 105.1 vvc_avg_8_4x64_c: 3615.8 vvc_avg_8_4x64_avx2: 412.6 vvc_avg_8_4x128_c: 11962.6 vvc_avg_8_4x128_avx2: 1274.3 vvc_avg_8_8x2_c: 215.8 vvc_avg_8_8x2_avx2: 29.1 vvc_avg_8_8x4_c: 430.6 vvc_avg_8_8x4_avx2: 37.6 vvc_avg_8_8x8_c: 1463.3 vvc_avg_8_8x8_avx2: 51.8 vvc_avg_8_8x16_c: 2630.1 vvc_avg_8_8x16_avx2: 97.6 vvc_avg_8_8x32_c: 5813.8 vvc_avg_8_8x32_avx2: 196.6 vvc_avg_8_8x64_c: 6687.3 vvc_avg_8_8x64_avx2: 487.8 vvc_avg_8_8x128_c: 13178.6 vvc_avg_8_8x128_avx2: 1290.6 vvc_avg_8_16x2_c: 443.8 vvc_avg_8_16x2_avx2: 28.3 vvc_avg_8_16x4_c: 1253.3 vvc_avg_8_16x4_avx2: 32.1 vvc_avg_8_16x8_c: 2236.3 vvc_avg_8_16x8_avx2: 44.3 vvc_avg_8_16x16_c: 5127.8 vvc_avg_8_16x16_avx2: 63.3 vvc_avg_8_16x32_c: 6573.3 vvc_avg_8_16x32_avx2: 223.6 vvc_avg_8_16x64_c: 30311.8 vvc_avg_8_16x64_avx2: 437.8 vvc_avg_8_16x128_c: 25693.3 vvc_avg_8_16x128_avx2: 1266.8 vvc_avg_8_32x2_c: 954.6 vvc_avg_8_32x2_avx2: 32.1 vvc_avg_8_32x4_c: 2359.6 vvc_avg_8_32x4_avx2: 39.6 vvc_avg_8_32x8_c: 5703.6 vvc_avg_8_32x8_avx2: 57.1 vvc_avg_8_32x16_c: 9967.6 vvc_avg_8_32x16_avx2: 107.1 vvc_avg_8_32x32_c: 21327.6 vvc_avg_8_32x32_avx2: 272.6 vvc_avg_8_32x64_c: 39240.8 vvc_avg_8_32x64_avx2: 529.6 vvc_avg_8_32x128_c: 52580.8 vvc_avg_8_32x128_avx2: 1338.8 vvc_avg_8_64x2_c: 1647.3 vvc_avg_8_64x2_avx2: 38.8 vvc_avg_8_64x4_c: 5130.1 vvc_avg_8_64x4_avx2: 58.8 vvc_avg_8_64x8_c: 6529.3 vvc_avg_8_64x8_avx2: 88.3 vvc_avg_8_64x16_c: 19913.6 vvc_avg_8_64x16_avx2: 162.3 vvc_avg_8_64x32_c: 39360.8 vvc_avg_8_64x32_avx2: 295.8 vvc_avg_8_64x64_c: 49658.3 vvc_avg_8_64x64_avx2: 784.1 vvc_avg_8_64x128_c: 108513.1 vvc_avg_8_64x128_avx2: 1977.1 vvc_avg_8_128x2_c: 3226.1 vvc_avg_8_128x2_avx2: 61.1 vvc_avg_8_128x4_c: 10280.3 vvc_avg_8_128x4_avx2: 94.6 vvc_avg_8_128x8_c: 18079.3 vvc_avg_8_128x8_avx2: 155.3 vvc_avg_8_128x16_c: 45121.8 vvc_avg_8_128x16_avx2: 285.3 vvc_avg_8_128x32_c: 48651.8 vvc_avg_8_128x32_avx2: 581.6 vvc_avg_8_128x64_c: 165078.6 vvc_avg_8_128x64_avx2: 1942.8 vvc_avg_8_128x128_c: 339103.1 vvc_avg_8_128x128_avx2: 4332.6 vvc_avg_10_2x2_c: 144.3 vvc_avg_10_2x2_avx2: 26.8 vvc_avg_10_2x4_c: 142.6 vvc_avg_10_2x4_avx2: 45.3 vvc_avg_10_2x8_c: 478.1 vvc_avg_10_2x8_avx2: 38.1 vvc_avg_10_2x16_c: 518.3 vvc_avg_10_2x16_avx2: 58.1 vvc_avg_10_2x32_c: 2059.8 vvc_avg_10_2x32_avx2: 93.1 vvc_avg_10_2x64_c: 2383.8 vvc_avg_10_2x64_avx2: 714.8 vvc_avg_10_2x128_c: 4498.3 vvc_avg_10_2x128_avx2: 1466.3 vvc_avg_10_4x2_c: 228.6 vvc_avg_10_4x2_avx2: 26.8 vvc_avg_10_4x4_c: 378.3 vvc_avg_10_4x4_avx2: 30.6 vvc_avg_10_4x8_c: 866.8 vvc_avg_10_4x8_avx2: 44.6 vvc_avg_10_4x16_c: 1018.1 vvc_avg_10_4x16_avx2: 58.1 vvc_avg_10_4x32_c: 3590.8 vvc_avg_10_4x32_avx2: 128.8 vvc_avg_10_4x64_c: 4200.8 vvc_avg_10_4x64_avx2: 663.6 vvc_avg_10_4x128_c: 8450.8 vvc_avg_10_4x128_avx2: 1531.8 vvc_avg_10_8x2_c: 369.3 vvc_avg_10_8x2_avx2: 28.3 vvc_avg_10_8x4_c: 513.8 vvc_avg_10_8x4_avx2: 32.1 vvc_avg_10_8x8_c: 1720.3 vvc_avg_10_8x8_avx2: 49.1 vvc_avg_10_8x16_c: 1894.8 vvc_avg_10_8x16_avx2: 71.6 vvc_avg_10_8x32_c: 3931.3 vvc_avg_10_8x32_avx2: 148.1 vvc_avg_10_8x64_c: 7964.3 vvc_avg_10_8x64_avx2: 613.1 vvc_avg_10_8x128_c: 15540.1 vvc_avg_10_8x128_avx2: 1585.1 vvc_avg_10_16x2_c: 877.3 vvc_avg_10_16x2_avx2: 27.6 vvc_avg_10_16x4_c: 955.8 vvc_avg_10_16x4_avx2: 29.8 vvc_avg_10_16x8_c: 3419.6 vvc_avg_10_16x8_avx2: 62.6 vvc_avg_10_16x16_c: 3826.8 vvc_avg_10_16x16_avx2: 54.3 vvc_avg_10_16x32_c: 7655.3 vvc_avg_10_16x32_avx2: 86.3 vvc_avg_10_16x64_c: 30011.1 vvc_avg_10_16x64_avx2: 692.6 vvc_avg_10_16x128_c: 47894.8 vvc_avg_10_16x128_avx2: 1580.3 vvc_avg_10_32x2_c: 944.3 vvc_avg_10_32x2_avx2: 29.8 vvc_avg_10_32x4_c: 2022.6 vvc_avg_10_32x4_avx2: 35.1 vvc_avg_10_32x8_c: 6148.8 vvc_avg_10_32x8_avx2: 51.3 vvc_avg_10_32x16_c: 12601.6 vvc_avg_10_32x16_avx2: 70.8 vvc_avg_10_32x32_c: 15958.6 vvc_avg_10_32x32_avx2: 124.3 vvc_avg_10_32x64_c: 31784.6 vvc_avg_10_32x64_avx2: 757.3 vvc_avg_10_32x128_c: 63892.8 vvc_avg_10_32x128_avx2: 1711.3 vvc_avg_10_64x2_c: 1890.8 vvc_avg_10_64x2_avx2: 34.3 vvc_avg_10_64x4_c: 6267.3 vvc_avg_10_64x4_avx2: 42.6 vvc_avg_10_64x8_c: 12778.1 vvc_avg_10_64x8_avx2: 67.8 vvc_avg_10_64x16_c: 22304.3 vvc_avg_10_64x16_avx2: 116.8 vvc_avg_10_64x32_c: 30777.1 vvc_avg_10_64x32_avx2: 201.1 vvc_avg_10_64x64_c: 60169.1 vvc_avg_10_64x64_avx2: 1454.3 vvc_avg_10_64x128_c: 124392.8 vvc_avg_10_64x128_avx2: 3648.6 vvc_avg_10_128x2_c: 3650.1 vvc_avg_10_128x2_avx2: 41.1 vvc_avg_10_128x4_c: 22887.8 vvc_avg_10_128x4_avx2: 64.1 vvc_avg_10_128x8_c: 14622.6 vvc_avg_10_128x8_avx2: 111.6 vvc_avg_10_128x16_c: 62207.6 vvc_avg_10_128x16_avx2: 186.3 vvc_avg_10_128x32_c: 59761.3 vvc_avg_10_128x32_avx2: 374.6 vvc_avg_10_128x64_c: 117504.3 vvc_avg_10_128x64_avx2: 2684.6 vvc_avg_10_128x128_c: 236767.6 vvc_avg_10_128x128_avx2: 15278.1 vvc_avg_12_2x2_c: 78.6 vvc_avg_12_2x2_avx2: 26.1 vvc_avg_12_2x4_c: 254.1 vvc_avg_12_2x4_avx2: 30.6 vvc_avg_12_2x8_c: 261.8 vvc_avg_12_2x8_avx2: 39.1 vvc_avg_12_2x16_c: 527.6 vvc_avg_12_2x16_avx2: 57.3 vvc_avg_12_2x32_c: 1089.1 vvc_avg_12_2x32_avx2: 93.8 vvc_avg_12_2x64_c: 2337.6 vvc_avg_12_2x64_avx2: 707.1 vvc_avg_12_2x128_c: 4582.1 vvc_avg_12_2x128_avx2: 1414.6 vvc_avg_12_4x2_c: 129.6 vvc_avg_12_4x2_avx2: 26.8 vvc_avg_12_4x4_c: 427.3 vvc_avg_12_4x4_avx2: 30.6 vvc_avg_12_4x8_c: 529.6 vvc_avg_12_4x8_avx2: 36.6 vvc_avg_12_4x16_c: 1022.1 vvc_avg_12_4x16_avx2: 57.3 vvc_avg_12_4x32_c: 1987.6 vvc_avg_12_4x32_avx2: 84.3 vvc_avg_12_4x64_c: 4147.6 vvc_avg_12_4x64_avx2: 706.3 vvc_avg_12_4x128_c: 8469.3 vvc_avg_12_4x128_avx2: 1448.3 vvc_avg_12_8x2_c: 253.6 vvc_avg_12_8x2_avx2: 27.6 vvc_avg_12_8x4_c: 836.3 vvc_avg_12_8x4_avx2: 32.1 vvc_avg_12_8x8_c: 1074.6 vvc_avg_12_8x8_avx2: 45.1 vvc_avg_12_8x16_c: 3616.8 vvc_avg_12_8x16_avx2: 71.6 vvc_avg_12_8x32_c: 3823.6 vvc_avg_12_8x32_avx2: 140.1 vvc_avg_12_8x64_c: 7764.8 vvc_avg_12_8x64_avx2: 656.1 vvc_avg_12_8x128_c: 15896.1 vvc_avg_12_8x128_avx2: 1232.8 vvc_avg_12_16x2_c: 462.1 vvc_avg_12_16x2_avx2: 26.8 vvc_avg_12_16x4_c: 1732.1 vvc_avg_12_16x4_avx2: 29.1 vvc_avg_12_16x8_c: 2097.6 vvc_avg_12_16x8_avx2: 62.6 vvc_avg_12_16x16_c: 6753.1 vvc_avg_12_16x16_avx2: 47.8 vvc_avg_12_16x32_c: 7373.1 vvc_avg_12_16x32_avx2: 80.8 vvc_avg_12_16x64_c: 15046.3 vvc_avg_12_16x64_avx2: 621.1 vvc_avg_12_16x128_c: 52574.6 vvc_avg_12_16x128_avx2: 1417.1 vvc_avg_12_32x2_c: 1712.1 vvc_avg_12_32x2_avx2: 29.8 vvc_avg_12_32x4_c: 2036.8 vvc_avg_12_32x4_avx2: 37.6 vvc_avg_12_32x8_c: 4017.6 vvc_avg_12_32x8_avx2: 44.1 vvc_avg_12_32x16_c: 8018.6 vvc_avg_12_32x16_avx2: 70.8 vvc_avg_12_32x32_c: 15637.6 vvc_avg_12_32x32_avx2: 124.3 vvc_avg_12_32x64_c: 31143.3 vvc_avg_12_32x64_avx2: 830.3 vvc_avg_12_32x128_c: 75706.8 vvc_avg_12_32x128_avx2: 1604.8 vvc_avg_12_64x2_c: 3230.3 vvc_avg_12_64x2_avx2: 33.6 vvc_avg_12_64x4_c: 4139.6 vvc_avg_12_64x4_avx2: 45.1 vvc_avg_12_64x8_c: 8201.6 vvc_avg_12_64x8_avx2: 67.1 vvc_avg_12_64x16_c: 25632.3 vvc_avg_12_64x16_avx2: 110.3 vvc_avg_12_64x32_c: 30744.3 vvc_avg_12_64x32_avx2: 200.3 vvc_avg_12_64x64_c: 105554.8 vvc_avg_12_64x64_avx2: 1325.6 vvc_avg_12_64x128_c: 235254.3 vvc_avg_12_64x128_avx2: 3132.6 vvc_avg_12_128x2_c: 6194.3 vvc_avg_12_128x2_avx2: 55.1 vvc_avg_12_128x4_c: 7583.8 vvc_avg_12_128x4_avx2: 79.3 vvc_avg_12_128x8_c: 14635.6 vvc_avg_12_128x8_avx2: 104.3 vvc_avg_12_128x16_c: 29270.8 vvc_avg_12_128x16_avx2: 194.3 vvc_avg_12_128x32_c: 60113.6 vvc_avg_12_128x32_avx2: 346.3 vvc_avg_12_128x64_c: 197030.3 vvc_avg_12_128x64_avx2: 2779.6 vvc_avg_12_128x128_c: 432809.6 vvc_avg_12_128x128_avx2: 5513.3 vvc_w_avg_8_2x2_c: 84.3 vvc_w_avg_8_2x2_avx2: 42.6 vvc_w_avg_8_2x4_c: 156.3 vvc_w_avg_8_2x4_avx2: 58.8 vvc_w_avg_8_2x8_c: 310.6 vvc_w_avg_8_2x8_avx2: 73.1 vvc_w_avg_8_2x16_c: 942.1 vvc_w_avg_8_2x16_avx2: 113.3 vvc_w_avg_8_2x32_c: 1098.8 vvc_w_avg_8_2x32_avx2: 202.6 vvc_w_avg_8_2x64_c: 2414.3 vvc_w_avg_8_2x64_avx2: 467.6 vvc_w_avg_8_2x128_c: 4763.8 vvc_w_avg_8_2x128_avx2: 1333.1 vvc_w_avg_8_4x2_c: 140.1 vvc_w_avg_8_4x2_avx2: 49.8 vvc_w_avg_8_4x4_c: 276.3 vvc_w_avg_8_4x4_avx2: 58.1 vvc_w_avg_8_4x8_c: 524.3 vvc_w_avg_8_4x8_avx2: 72.3 vvc_w_avg_8_4x16_c: 1108.1 vvc_w_avg_8_4x16_avx2: 111.8 vvc_w_avg_8_4x32_c: 2149.8 vvc_w_avg_8_4x32_avx2: 199.6 vvc_w_avg_8_4x64_c: 12288.1 vvc_w_avg_8_4x64_avx2: 509.3 vvc_w_avg_8_4x128_c: 8398.6 vvc_w_avg_8_4x128_avx2: 1319.6 vvc_w_avg_8_8x2_c: 271.1 vvc_w_avg_8_8x2_avx2: 44.1 vvc_w_avg_8_8x4_c: 503.3 vvc_w_avg_8_8x4_avx2: 61.8 vvc_w_avg_8_8x8_c: 1031.1 vvc_w_avg_8_8x8_avx2: 93.8 vvc_w_avg_8_8x16_c: 2009.8 vvc_w_avg_8_8x16_avx2: 163.1 vvc_w_avg_8_8x32_c: 4161.3 vvc_w_avg_8_8x32_avx2: 292.1 vvc_w_avg_8_8x64_c: 7940.6 vvc_w_avg_8_8x64_avx2: 592.1 vvc_w_avg_8_8x128_c: 16802.3 vvc_w_avg_8_8x128_avx2: 1287.6 vvc_w_avg_8_16x2_c: 762.6 vvc_w_avg_8_16x2_avx2: 53.6 vvc_w_avg_8_16x4_c: 1486.3 vvc_w_avg_8_16x4_avx2: 67.1 vvc_w_avg_8_16x8_c: 1907.8 vvc_w_avg_8_16x8_avx2: 96.8 vvc_w_avg_8_16x16_c: 3883.6 vvc_w_avg_8_16x16_avx2: 151.3 vvc_w_avg_8_16x32_c: 7974.8 vvc_w_avg_8_16x32_avx2: 285.8 vvc_w_avg_8_16x64_c: 25160.6 vvc_w_avg_8_16x64_avx2: 589.8 vvc_w_avg_8_16x128_c: 58328.1 vvc_w_avg_8_16x128_avx2: 1169.8 vvc_w_avg_8_32x2_c: 1009.1 vvc_w_avg_8_32x2_avx2: 65.6 vvc_w_avg_8_32x4_c: 2091.1 vvc_w_avg_8_32x4_avx2: 96.8 vvc_w_avg_8_32x8_c: 3997.8 vvc_w_avg_8_32x8_avx2: 156.3 vvc_w_avg_8_32x16_c: 8216.8 vvc_w_avg_8_32x16_avx2: 269.6 vvc_w_avg_8_32x32_c: 21746.1 vvc_w_avg_8_32x32_avx2: 635.3 vvc_w_avg_8_32x64_c: 31564.8 vvc_w_avg_8_32x64_avx2: 1010.6 vvc_w_avg_8_32x128_c: 114373.3 vvc_w_avg_8_32x128_avx2: 2013.6 vvc_w_avg_8_64x2_c: 2067.3 vvc_w_avg_8_64x2_avx2: 97.6 vvc_w_avg_8_64x4_c: 3901.1 vvc_w_avg_8_64x4_avx2: 154.8 vvc_w_avg_8_64x8_c: 7911.6 vvc_w_avg_8_64x8_avx2: 268.8 vvc_w_avg_8_64x16_c: 16508.8 vvc_w_avg_8_64x16_avx2: 501.8 vvc_w_avg_8_64x32_c: 38770.3 vvc_w_avg_8_64x32_avx2: 1287.6 vvc_w_avg_8_64x64_c: 110350.6 vvc_w_avg_8_64x64_avx2: 1890.8 vvc_w_avg_8_64x128_c: 141354.6 vvc_w_avg_8_64x128_avx2: 3839.6 vvc_w_avg_8_128x2_c: 7012.1 vvc_w_avg_8_128x2_avx2: 159.3 vvc_w_avg_8_128x4_c: 8146.8 vvc_w_avg_8_128x4_avx2: 272.6 vvc_w_avg_8_128x8_c: 24596.8 vvc_w_avg_8_128x8_avx2: 501.1 vvc_w_avg_8_128x16_c: 35918.1 vvc_w_avg_8_128x16_avx2: 948.8 vvc_w_avg_8_128x32_c: 68799.6 vvc_w_avg_8_128x32_avx2: 1963.1 vvc_w_avg_8_128x64_c: 133862.1 vvc_w_avg_8_128x64_avx2: 3833.6 vvc_w_avg_8_128x128_c: 348427.8 vvc_w_avg_8_128x128_avx2: 7682.8 vvc_w_avg_10_2x2_c: 118.6 vvc_w_avg_10_2x2_avx2: 73.1 vvc_w_avg_10_2x4_c: 189.1 vvc_w_avg_10_2x4_avx2: 89.3 vvc_w_avg_10_2x8_c: 382.8 vvc_w_avg_10_2x8_avx2: 179.8 vvc_w_avg_10_2x16_c: 658.3 vvc_w_avg_10_2x16_avx2: 185.1 vvc_w_avg_10_2x32_c: 1409.3 vvc_w_avg_10_2x32_avx2: 290.8 vvc_w_avg_10_2x64_c: 2906.8 vvc_w_avg_10_2x64_avx2: 793.1 vvc_w_avg_10_2x128_c: 6292.6 vvc_w_avg_10_2x128_avx2: 1696.8 vvc_w_avg_10_4x2_c: 178.8 vvc_w_avg_10_4x2_avx2: 80.1 vvc_w_avg_10_4x4_c: 581.6 vvc_w_avg_10_4x4_avx2: 97.6 vvc_w_avg_10_4x8_c: 693.3 vvc_w_avg_10_4x8_avx2: 128.1 vvc_w_avg_10_4x16_c: 1436.6 vvc_w_avg_10_4x16_avx2: 179.8 vvc_w_avg_10_4x32_c: 2409.1 vvc_w_avg_10_4x32_avx2: 292.3 vvc_w_avg_10_4x64_c: 4925.3 vvc_w_avg_10_4x64_avx2: 746.1 vvc_w_avg_10_4x128_c: 10664.6 vvc_w_avg_10_4x128_avx2: 1647.6 vvc_w_avg_10_8x2_c: 359.3 vvc_w_avg_10_8x2_avx2: 80.1 vvc_w_avg_10_8x4_c: 925.6 vvc_w_avg_10_8x4_avx2: 97.6 vvc_w_avg_10_8x8_c: 1360.6 vvc_w_avg_10_8x8_avx2: 121.8 vvc_w_avg_10_8x16_c: 3490.3 vvc_w_avg_10_8x16_avx2: 203.3 vvc_w_avg_10_8x32_c: 5266.1 vvc_w_avg_10_8x32_avx2: 325.8 vvc_w_avg_10_8x64_c: 11127.1 vvc_w_avg_10_8x64_avx2: 747.8 vvc_w_avg_10_8x128_c: 31058.3 vvc_w_avg_10_8x128_avx2: 1424.6 vvc_w_avg_10_16x2_c: 624.8 vvc_w_avg_10_16x2_avx2: 84.6 vvc_w_avg_10_16x4_c: 1389.6 vvc_w_avg_10_16x4_avx2: 109.1 vvc_w_avg_10_16x8_c: 2688.3 vvc_w_avg_10_16x8_avx2: 137.1 vvc_w_avg_10_16x16_c: 5387.1 vvc_w_avg_10_16x16_avx2: 224.6 vvc_w_avg_10_16x32_c: 10776.3 vvc_w_avg_10_16x32_avx2: 312.1 vvc_w_avg_10_16x64_c: 18069.1 vvc_w_avg_10_16x64_avx2: 858.6 vvc_w_avg_10_16x128_c: 43460.3 vvc_w_avg_10_16x128_avx2: 1411.6 vvc_w_avg_10_32x2_c: 1232.8 vvc_w_avg_10_32x2_avx2: 99.1 vvc_w_avg_10_32x4_c: 4017.6 vvc_w_avg_10_32x4_avx2: 134.1 vvc_w_avg_10_32x8_c: 9306.3 vvc_w_avg_10_32x8_avx2: 208.1 vvc_w_avg_10_32x16_c: 8424.6 vvc_w_avg_10_32x16_avx2: 349.3 vvc_w_avg_10_32x32_c: 20787.8 vvc_w_avg_10_32x32_avx2: 655.3 vvc_w_avg_10_32x64_c: 40972.1 vvc_w_avg_10_32x64_avx2: 904.8 vvc_w_avg_10_32x128_c: 85670.3 vvc_w_avg_10_32x128_avx2: 1751.6 vvc_w_avg_10_64x2_c: 2454.1 vvc_w_avg_10_64x2_avx2: 132.6 vvc_w_avg_10_64x4_c: 5012.6 vvc_w_avg_10_64x4_avx2: 215.6 vvc_w_avg_10_64x8_c: 10811.3 vvc_w_avg_10_64x8_avx2: 361.1 vvc_w_avg_10_64x16_c: 33349.1 vvc_w_avg_10_64x16_avx2: 904.1 vvc_w_avg_10_64x32_c: 41892.3 vvc_w_avg_10_64x32_avx2: 1220.6 vvc_w_avg_10_64x64_c: 66983.3 vvc_w_avg_10_64x64_avx2: 2622.1 vvc_w_avg_10_64x128_c: 246508.8 vvc_w_avg_10_64x128_avx2: 3316.8 vvc_w_avg_10_128x2_c: 7791.6 vvc_w_avg_10_128x2_avx2: 198.8 vvc_w_avg_10_128x4_c: 10534.3 vvc_w_avg_10_128x4_avx2: 337.3 vvc_w_avg_10_128x8_c: 21142.3 vvc_w_avg_10_128x8_avx2: 614.8 vvc_w_avg_10_128x16_c: 40968.6 vvc_w_avg_10_128x16_avx2: 1160.6 vvc_w_avg_10_128x32_c: 113043.3 vvc_w_avg_10_128x32_avx2: 1644.6 vvc_w_avg_10_128x64_c: 230658.3 vvc_w_avg_10_128x64_avx2: 5065.3 vvc_w_avg_10_128x128_c: 335236.3 vvc_w_avg_10_128x128_avx2: 6450.3 vvc_w_avg_12_2x2_c: 185.3 vvc_w_avg_12_2x2_avx2: 43.6 vvc_w_avg_12_2x4_c: 340.3 vvc_w_avg_12_2x4_avx2: 55.8 vvc_w_avg_12_2x8_c: 632.3 vvc_w_avg_12_2x8_avx2: 70.1 vvc_w_avg_12_2x16_c: 728.3 vvc_w_avg_12_2x16_avx2: 108.1 vvc_w_avg_12_2x32_c: 1392.6 vvc_w_avg_12_2x32_avx2: 176.8 vvc_w_avg_12_2x64_c: 2618.3 vvc_w_avg_12_2x64_avx2: 757.3 vvc_w_avg_12_2x128_c: 6408.8 vvc_w_avg_12_2x128_avx2: 1435.1 vvc_w_avg_12_4x2_c: 349.3 vvc_w_avg_12_4x2_avx2: 44.3 vvc_w_avg_12_4x4_c: 607.1 vvc_w_avg_12_4x4_avx2: 52.6 vvc_w_avg_12_4x8_c: 1134.8 vvc_w_avg_12_4x8_avx2: 70.1 vvc_w_avg_12_4x16_c: 1378.1 vvc_w_avg_12_4x16_avx2: 115.3 vvc_w_avg_12_4x32_c: 2599.3 vvc_w_avg_12_4x32_avx2: 174.3 vvc_w_avg_12_4x64_c: 4474.8 vvc_w_avg_12_4x64_avx2: 656.1 vvc_w_avg_12_4x128_c: 11319.6 vvc_w_avg_12_4x128_avx2: 1373.1 vvc_w_avg_12_8x2_c: 595.8 vvc_w_avg_12_8x2_avx2: 44.3 vvc_w_avg_12_8x4_c: 1164.3 vvc_w_avg_12_8x4_avx2: 56.6 vvc_w_avg_12_8x8_c: 2019.6 vvc_w_avg_12_8x8_avx2: 80.1 vvc_w_avg_12_8x16_c: 4071.6 vvc_w_avg_12_8x16_avx2: 139.3 vvc_w_avg_12_8x32_c: 4485.1 vvc_w_avg_12_8x32_avx2: 250.6 vvc_w_avg_12_8x64_c: 8404.8 vvc_w_avg_12_8x64_avx2: 735.8 vvc_w_avg_12_8x128_c: 35679.8 vvc_w_avg_12_8x128_avx2: 1252.6 vvc_w_avg_12_16x2_c: 1114.8 vvc_w_avg_12_16x2_avx2: 46.6 vvc_w_avg_12_16x4_c: 2240.1 vvc_w_avg_12_16x4_avx2: 62.6 vvc_w_avg_12_16x8_c: 13174.6 vvc_w_avg_12_16x8_avx2: 88.6 vvc_w_avg_12_16x16_c: 5334.6 vvc_w_avg_12_16x16_avx2: 144.3 vvc_w_avg_12_16x32_c: 8378.1 vvc_w_avg_12_16x32_avx2: 234.6 vvc_w_avg_12_16x64_c: 21300.8 vvc_w_avg_12_16x64_avx2: 761.8 vvc_w_avg_12_16x128_c: 32786.8 vvc_w_avg_12_16x128_avx2: 1432.8 vvc_w_avg_12_32x2_c: 2154.3 vvc_w_avg_12_32x2_avx2: 61.1 vvc_w_avg_12_32x4_c: 4299.8 vvc_w_avg_12_32x4_avx2: 83.1 vvc_w_avg_12_32x8_c: 7964.8 vvc_w_avg_12_32x8_avx2: 132.6 vvc_w_avg_12_32x16_c: 13321.6 vvc_w_avg_12_32x16_avx2: 234.6 vvc_w_avg_12_32x32_c: 21149.3 vvc_w_avg_12_32x32_avx2: 433.3 vvc_w_avg_12_32x64_c: 43666.6 vvc_w_avg_12_32x64_avx2: 876.6 vvc_w_avg_12_32x128_c: 83189.8 vvc_w_avg_12_32x128_avx2: 1756.6 vvc_w_avg_12_64x2_c: 3829.8 vvc_w_avg_12_64x2_avx2: 83.1 vvc_w_avg_12_64x4_c: 8588.1 vvc_w_avg_12_64x4_avx2: 127.1 vvc_w_avg_12_64x8_c: 17027.6 vvc_w_avg_12_64x8_avx2: 310.6 vvc_w_avg_12_64x16_c: 29797.8 vvc_w_avg_12_64x16_avx2: 415.6 vvc_w_avg_12_64x32_c: 43854.3 vvc_w_avg_12_64x32_avx2: 773.3 vvc_w_avg_12_64x64_c: 137767.3 vvc_w_avg_12_64x64_avx2: 1608.6 vvc_w_avg_12_64x128_c: 316428.3 vvc_w_avg_12_64x128_avx2: 3249.8 vvc_w_avg_12_128x2_c: 8824.6 vvc_w_avg_12_128x2_avx2: 130.3 vvc_w_avg_12_128x4_c: 17173.6 vvc_w_avg_12_128x4_avx2: 219.3 vvc_w_avg_12_128x8_c: 21997.8 vvc_w_avg_12_128x8_avx2: 397.3 vvc_w_avg_12_128x16_c: 43553.8 vvc_w_avg_12_128x16_avx2: 790.1 vvc_w_avg_12_128x32_c: 89792.1 vvc_w_avg_12_128x32_avx2: 1497.6 vvc_w_avg_12_128x64_c: 226573.3 vvc_w_avg_12_128x64_avx2: 3153.1 vvc_w_avg_12_128x128_c: 332090.1 vvc_w_avg_12_128x128_avx2: 6499.6 Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
2024-01-23 18:17:10 +00:00
void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
{
#if ARCH_X86_64
const int cpu_flags = av_get_cpu_flags();
if (bd == 8) {
if (EXTERNAL_SSE4(cpu_flags)) {
MC_LINK_SSE4(8);
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
MC_LINKS_AVX2(8);
}
} else if (bd == 10) {
if (EXTERNAL_SSE4(cpu_flags)) {
MC_LINK_SSE4(10);
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
MC_LINKS_AVX2(10);
MC_LINKS_16BPC_AVX2(10);
}
} else if (bd == 12) {
if (EXTERNAL_SSE4(cpu_flags)) {
MC_LINK_SSE4(12);
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
MC_LINKS_AVX2(12);
MC_LINKS_16BPC_AVX2(12);
}
}
avcodec/x86/vvc: add avg and avg_w AVX2 optimizations The avg/avg_w is based on dav1d. See https://code.videolan.org/videolan/dav1d/-/blob/master/src/x86/mc_avx2.asm vvc_avg_8_2x2_c: 71.6 vvc_avg_8_2x2_avx2: 26.8 vvc_avg_8_2x4_c: 140.8 vvc_avg_8_2x4_avx2: 34.6 vvc_avg_8_2x8_c: 410.3 vvc_avg_8_2x8_avx2: 41.3 vvc_avg_8_2x16_c: 769.3 vvc_avg_8_2x16_avx2: 60.3 vvc_avg_8_2x32_c: 1669.6 vvc_avg_8_2x32_avx2: 105.1 vvc_avg_8_2x64_c: 1978.3 vvc_avg_8_2x64_avx2: 425.8 vvc_avg_8_2x128_c: 6536.8 vvc_avg_8_2x128_avx2: 1315.1 vvc_avg_8_4x2_c: 155.6 vvc_avg_8_4x2_avx2: 26.1 vvc_avg_8_4x4_c: 250.3 vvc_avg_8_4x4_avx2: 31.3 vvc_avg_8_4x8_c: 831.8 vvc_avg_8_4x8_avx2: 41.3 vvc_avg_8_4x16_c: 1461.1 vvc_avg_8_4x16_avx2: 57.1 vvc_avg_8_4x32_c: 2821.6 vvc_avg_8_4x32_avx2: 105.1 vvc_avg_8_4x64_c: 3615.8 vvc_avg_8_4x64_avx2: 412.6 vvc_avg_8_4x128_c: 11962.6 vvc_avg_8_4x128_avx2: 1274.3 vvc_avg_8_8x2_c: 215.8 vvc_avg_8_8x2_avx2: 29.1 vvc_avg_8_8x4_c: 430.6 vvc_avg_8_8x4_avx2: 37.6 vvc_avg_8_8x8_c: 1463.3 vvc_avg_8_8x8_avx2: 51.8 vvc_avg_8_8x16_c: 2630.1 vvc_avg_8_8x16_avx2: 97.6 vvc_avg_8_8x32_c: 5813.8 vvc_avg_8_8x32_avx2: 196.6 vvc_avg_8_8x64_c: 6687.3 vvc_avg_8_8x64_avx2: 487.8 vvc_avg_8_8x128_c: 13178.6 vvc_avg_8_8x128_avx2: 1290.6 vvc_avg_8_16x2_c: 443.8 vvc_avg_8_16x2_avx2: 28.3 vvc_avg_8_16x4_c: 1253.3 vvc_avg_8_16x4_avx2: 32.1 vvc_avg_8_16x8_c: 2236.3 vvc_avg_8_16x8_avx2: 44.3 vvc_avg_8_16x16_c: 5127.8 vvc_avg_8_16x16_avx2: 63.3 vvc_avg_8_16x32_c: 6573.3 vvc_avg_8_16x32_avx2: 223.6 vvc_avg_8_16x64_c: 30311.8 vvc_avg_8_16x64_avx2: 437.8 vvc_avg_8_16x128_c: 25693.3 vvc_avg_8_16x128_avx2: 1266.8 vvc_avg_8_32x2_c: 954.6 vvc_avg_8_32x2_avx2: 32.1 vvc_avg_8_32x4_c: 2359.6 vvc_avg_8_32x4_avx2: 39.6 vvc_avg_8_32x8_c: 5703.6 vvc_avg_8_32x8_avx2: 57.1 vvc_avg_8_32x16_c: 9967.6 vvc_avg_8_32x16_avx2: 107.1 vvc_avg_8_32x32_c: 21327.6 vvc_avg_8_32x32_avx2: 272.6 vvc_avg_8_32x64_c: 39240.8 vvc_avg_8_32x64_avx2: 529.6 vvc_avg_8_32x128_c: 52580.8 vvc_avg_8_32x128_avx2: 1338.8 vvc_avg_8_64x2_c: 1647.3 vvc_avg_8_64x2_avx2: 38.8 vvc_avg_8_64x4_c: 5130.1 vvc_avg_8_64x4_avx2: 58.8 vvc_avg_8_64x8_c: 6529.3 vvc_avg_8_64x8_avx2: 88.3 vvc_avg_8_64x16_c: 19913.6 vvc_avg_8_64x16_avx2: 162.3 vvc_avg_8_64x32_c: 39360.8 vvc_avg_8_64x32_avx2: 295.8 vvc_avg_8_64x64_c: 49658.3 vvc_avg_8_64x64_avx2: 784.1 vvc_avg_8_64x128_c: 108513.1 vvc_avg_8_64x128_avx2: 1977.1 vvc_avg_8_128x2_c: 3226.1 vvc_avg_8_128x2_avx2: 61.1 vvc_avg_8_128x4_c: 10280.3 vvc_avg_8_128x4_avx2: 94.6 vvc_avg_8_128x8_c: 18079.3 vvc_avg_8_128x8_avx2: 155.3 vvc_avg_8_128x16_c: 45121.8 vvc_avg_8_128x16_avx2: 285.3 vvc_avg_8_128x32_c: 48651.8 vvc_avg_8_128x32_avx2: 581.6 vvc_avg_8_128x64_c: 165078.6 vvc_avg_8_128x64_avx2: 1942.8 vvc_avg_8_128x128_c: 339103.1 vvc_avg_8_128x128_avx2: 4332.6 vvc_avg_10_2x2_c: 144.3 vvc_avg_10_2x2_avx2: 26.8 vvc_avg_10_2x4_c: 142.6 vvc_avg_10_2x4_avx2: 45.3 vvc_avg_10_2x8_c: 478.1 vvc_avg_10_2x8_avx2: 38.1 vvc_avg_10_2x16_c: 518.3 vvc_avg_10_2x16_avx2: 58.1 vvc_avg_10_2x32_c: 2059.8 vvc_avg_10_2x32_avx2: 93.1 vvc_avg_10_2x64_c: 2383.8 vvc_avg_10_2x64_avx2: 714.8 vvc_avg_10_2x128_c: 4498.3 vvc_avg_10_2x128_avx2: 1466.3 vvc_avg_10_4x2_c: 228.6 vvc_avg_10_4x2_avx2: 26.8 vvc_avg_10_4x4_c: 378.3 vvc_avg_10_4x4_avx2: 30.6 vvc_avg_10_4x8_c: 866.8 vvc_avg_10_4x8_avx2: 44.6 vvc_avg_10_4x16_c: 1018.1 vvc_avg_10_4x16_avx2: 58.1 vvc_avg_10_4x32_c: 3590.8 vvc_avg_10_4x32_avx2: 128.8 vvc_avg_10_4x64_c: 4200.8 vvc_avg_10_4x64_avx2: 663.6 vvc_avg_10_4x128_c: 8450.8 vvc_avg_10_4x128_avx2: 1531.8 vvc_avg_10_8x2_c: 369.3 vvc_avg_10_8x2_avx2: 28.3 vvc_avg_10_8x4_c: 513.8 vvc_avg_10_8x4_avx2: 32.1 vvc_avg_10_8x8_c: 1720.3 vvc_avg_10_8x8_avx2: 49.1 vvc_avg_10_8x16_c: 1894.8 vvc_avg_10_8x16_avx2: 71.6 vvc_avg_10_8x32_c: 3931.3 vvc_avg_10_8x32_avx2: 148.1 vvc_avg_10_8x64_c: 7964.3 vvc_avg_10_8x64_avx2: 613.1 vvc_avg_10_8x128_c: 15540.1 vvc_avg_10_8x128_avx2: 1585.1 vvc_avg_10_16x2_c: 877.3 vvc_avg_10_16x2_avx2: 27.6 vvc_avg_10_16x4_c: 955.8 vvc_avg_10_16x4_avx2: 29.8 vvc_avg_10_16x8_c: 3419.6 vvc_avg_10_16x8_avx2: 62.6 vvc_avg_10_16x16_c: 3826.8 vvc_avg_10_16x16_avx2: 54.3 vvc_avg_10_16x32_c: 7655.3 vvc_avg_10_16x32_avx2: 86.3 vvc_avg_10_16x64_c: 30011.1 vvc_avg_10_16x64_avx2: 692.6 vvc_avg_10_16x128_c: 47894.8 vvc_avg_10_16x128_avx2: 1580.3 vvc_avg_10_32x2_c: 944.3 vvc_avg_10_32x2_avx2: 29.8 vvc_avg_10_32x4_c: 2022.6 vvc_avg_10_32x4_avx2: 35.1 vvc_avg_10_32x8_c: 6148.8 vvc_avg_10_32x8_avx2: 51.3 vvc_avg_10_32x16_c: 12601.6 vvc_avg_10_32x16_avx2: 70.8 vvc_avg_10_32x32_c: 15958.6 vvc_avg_10_32x32_avx2: 124.3 vvc_avg_10_32x64_c: 31784.6 vvc_avg_10_32x64_avx2: 757.3 vvc_avg_10_32x128_c: 63892.8 vvc_avg_10_32x128_avx2: 1711.3 vvc_avg_10_64x2_c: 1890.8 vvc_avg_10_64x2_avx2: 34.3 vvc_avg_10_64x4_c: 6267.3 vvc_avg_10_64x4_avx2: 42.6 vvc_avg_10_64x8_c: 12778.1 vvc_avg_10_64x8_avx2: 67.8 vvc_avg_10_64x16_c: 22304.3 vvc_avg_10_64x16_avx2: 116.8 vvc_avg_10_64x32_c: 30777.1 vvc_avg_10_64x32_avx2: 201.1 vvc_avg_10_64x64_c: 60169.1 vvc_avg_10_64x64_avx2: 1454.3 vvc_avg_10_64x128_c: 124392.8 vvc_avg_10_64x128_avx2: 3648.6 vvc_avg_10_128x2_c: 3650.1 vvc_avg_10_128x2_avx2: 41.1 vvc_avg_10_128x4_c: 22887.8 vvc_avg_10_128x4_avx2: 64.1 vvc_avg_10_128x8_c: 14622.6 vvc_avg_10_128x8_avx2: 111.6 vvc_avg_10_128x16_c: 62207.6 vvc_avg_10_128x16_avx2: 186.3 vvc_avg_10_128x32_c: 59761.3 vvc_avg_10_128x32_avx2: 374.6 vvc_avg_10_128x64_c: 117504.3 vvc_avg_10_128x64_avx2: 2684.6 vvc_avg_10_128x128_c: 236767.6 vvc_avg_10_128x128_avx2: 15278.1 vvc_avg_12_2x2_c: 78.6 vvc_avg_12_2x2_avx2: 26.1 vvc_avg_12_2x4_c: 254.1 vvc_avg_12_2x4_avx2: 30.6 vvc_avg_12_2x8_c: 261.8 vvc_avg_12_2x8_avx2: 39.1 vvc_avg_12_2x16_c: 527.6 vvc_avg_12_2x16_avx2: 57.3 vvc_avg_12_2x32_c: 1089.1 vvc_avg_12_2x32_avx2: 93.8 vvc_avg_12_2x64_c: 2337.6 vvc_avg_12_2x64_avx2: 707.1 vvc_avg_12_2x128_c: 4582.1 vvc_avg_12_2x128_avx2: 1414.6 vvc_avg_12_4x2_c: 129.6 vvc_avg_12_4x2_avx2: 26.8 vvc_avg_12_4x4_c: 427.3 vvc_avg_12_4x4_avx2: 30.6 vvc_avg_12_4x8_c: 529.6 vvc_avg_12_4x8_avx2: 36.6 vvc_avg_12_4x16_c: 1022.1 vvc_avg_12_4x16_avx2: 57.3 vvc_avg_12_4x32_c: 1987.6 vvc_avg_12_4x32_avx2: 84.3 vvc_avg_12_4x64_c: 4147.6 vvc_avg_12_4x64_avx2: 706.3 vvc_avg_12_4x128_c: 8469.3 vvc_avg_12_4x128_avx2: 1448.3 vvc_avg_12_8x2_c: 253.6 vvc_avg_12_8x2_avx2: 27.6 vvc_avg_12_8x4_c: 836.3 vvc_avg_12_8x4_avx2: 32.1 vvc_avg_12_8x8_c: 1074.6 vvc_avg_12_8x8_avx2: 45.1 vvc_avg_12_8x16_c: 3616.8 vvc_avg_12_8x16_avx2: 71.6 vvc_avg_12_8x32_c: 3823.6 vvc_avg_12_8x32_avx2: 140.1 vvc_avg_12_8x64_c: 7764.8 vvc_avg_12_8x64_avx2: 656.1 vvc_avg_12_8x128_c: 15896.1 vvc_avg_12_8x128_avx2: 1232.8 vvc_avg_12_16x2_c: 462.1 vvc_avg_12_16x2_avx2: 26.8 vvc_avg_12_16x4_c: 1732.1 vvc_avg_12_16x4_avx2: 29.1 vvc_avg_12_16x8_c: 2097.6 vvc_avg_12_16x8_avx2: 62.6 vvc_avg_12_16x16_c: 6753.1 vvc_avg_12_16x16_avx2: 47.8 vvc_avg_12_16x32_c: 7373.1 vvc_avg_12_16x32_avx2: 80.8 vvc_avg_12_16x64_c: 15046.3 vvc_avg_12_16x64_avx2: 621.1 vvc_avg_12_16x128_c: 52574.6 vvc_avg_12_16x128_avx2: 1417.1 vvc_avg_12_32x2_c: 1712.1 vvc_avg_12_32x2_avx2: 29.8 vvc_avg_12_32x4_c: 2036.8 vvc_avg_12_32x4_avx2: 37.6 vvc_avg_12_32x8_c: 4017.6 vvc_avg_12_32x8_avx2: 44.1 vvc_avg_12_32x16_c: 8018.6 vvc_avg_12_32x16_avx2: 70.8 vvc_avg_12_32x32_c: 15637.6 vvc_avg_12_32x32_avx2: 124.3 vvc_avg_12_32x64_c: 31143.3 vvc_avg_12_32x64_avx2: 830.3 vvc_avg_12_32x128_c: 75706.8 vvc_avg_12_32x128_avx2: 1604.8 vvc_avg_12_64x2_c: 3230.3 vvc_avg_12_64x2_avx2: 33.6 vvc_avg_12_64x4_c: 4139.6 vvc_avg_12_64x4_avx2: 45.1 vvc_avg_12_64x8_c: 8201.6 vvc_avg_12_64x8_avx2: 67.1 vvc_avg_12_64x16_c: 25632.3 vvc_avg_12_64x16_avx2: 110.3 vvc_avg_12_64x32_c: 30744.3 vvc_avg_12_64x32_avx2: 200.3 vvc_avg_12_64x64_c: 105554.8 vvc_avg_12_64x64_avx2: 1325.6 vvc_avg_12_64x128_c: 235254.3 vvc_avg_12_64x128_avx2: 3132.6 vvc_avg_12_128x2_c: 6194.3 vvc_avg_12_128x2_avx2: 55.1 vvc_avg_12_128x4_c: 7583.8 vvc_avg_12_128x4_avx2: 79.3 vvc_avg_12_128x8_c: 14635.6 vvc_avg_12_128x8_avx2: 104.3 vvc_avg_12_128x16_c: 29270.8 vvc_avg_12_128x16_avx2: 194.3 vvc_avg_12_128x32_c: 60113.6 vvc_avg_12_128x32_avx2: 346.3 vvc_avg_12_128x64_c: 197030.3 vvc_avg_12_128x64_avx2: 2779.6 vvc_avg_12_128x128_c: 432809.6 vvc_avg_12_128x128_avx2: 5513.3 vvc_w_avg_8_2x2_c: 84.3 vvc_w_avg_8_2x2_avx2: 42.6 vvc_w_avg_8_2x4_c: 156.3 vvc_w_avg_8_2x4_avx2: 58.8 vvc_w_avg_8_2x8_c: 310.6 vvc_w_avg_8_2x8_avx2: 73.1 vvc_w_avg_8_2x16_c: 942.1 vvc_w_avg_8_2x16_avx2: 113.3 vvc_w_avg_8_2x32_c: 1098.8 vvc_w_avg_8_2x32_avx2: 202.6 vvc_w_avg_8_2x64_c: 2414.3 vvc_w_avg_8_2x64_avx2: 467.6 vvc_w_avg_8_2x128_c: 4763.8 vvc_w_avg_8_2x128_avx2: 1333.1 vvc_w_avg_8_4x2_c: 140.1 vvc_w_avg_8_4x2_avx2: 49.8 vvc_w_avg_8_4x4_c: 276.3 vvc_w_avg_8_4x4_avx2: 58.1 vvc_w_avg_8_4x8_c: 524.3 vvc_w_avg_8_4x8_avx2: 72.3 vvc_w_avg_8_4x16_c: 1108.1 vvc_w_avg_8_4x16_avx2: 111.8 vvc_w_avg_8_4x32_c: 2149.8 vvc_w_avg_8_4x32_avx2: 199.6 vvc_w_avg_8_4x64_c: 12288.1 vvc_w_avg_8_4x64_avx2: 509.3 vvc_w_avg_8_4x128_c: 8398.6 vvc_w_avg_8_4x128_avx2: 1319.6 vvc_w_avg_8_8x2_c: 271.1 vvc_w_avg_8_8x2_avx2: 44.1 vvc_w_avg_8_8x4_c: 503.3 vvc_w_avg_8_8x4_avx2: 61.8 vvc_w_avg_8_8x8_c: 1031.1 vvc_w_avg_8_8x8_avx2: 93.8 vvc_w_avg_8_8x16_c: 2009.8 vvc_w_avg_8_8x16_avx2: 163.1 vvc_w_avg_8_8x32_c: 4161.3 vvc_w_avg_8_8x32_avx2: 292.1 vvc_w_avg_8_8x64_c: 7940.6 vvc_w_avg_8_8x64_avx2: 592.1 vvc_w_avg_8_8x128_c: 16802.3 vvc_w_avg_8_8x128_avx2: 1287.6 vvc_w_avg_8_16x2_c: 762.6 vvc_w_avg_8_16x2_avx2: 53.6 vvc_w_avg_8_16x4_c: 1486.3 vvc_w_avg_8_16x4_avx2: 67.1 vvc_w_avg_8_16x8_c: 1907.8 vvc_w_avg_8_16x8_avx2: 96.8 vvc_w_avg_8_16x16_c: 3883.6 vvc_w_avg_8_16x16_avx2: 151.3 vvc_w_avg_8_16x32_c: 7974.8 vvc_w_avg_8_16x32_avx2: 285.8 vvc_w_avg_8_16x64_c: 25160.6 vvc_w_avg_8_16x64_avx2: 589.8 vvc_w_avg_8_16x128_c: 58328.1 vvc_w_avg_8_16x128_avx2: 1169.8 vvc_w_avg_8_32x2_c: 1009.1 vvc_w_avg_8_32x2_avx2: 65.6 vvc_w_avg_8_32x4_c: 2091.1 vvc_w_avg_8_32x4_avx2: 96.8 vvc_w_avg_8_32x8_c: 3997.8 vvc_w_avg_8_32x8_avx2: 156.3 vvc_w_avg_8_32x16_c: 8216.8 vvc_w_avg_8_32x16_avx2: 269.6 vvc_w_avg_8_32x32_c: 21746.1 vvc_w_avg_8_32x32_avx2: 635.3 vvc_w_avg_8_32x64_c: 31564.8 vvc_w_avg_8_32x64_avx2: 1010.6 vvc_w_avg_8_32x128_c: 114373.3 vvc_w_avg_8_32x128_avx2: 2013.6 vvc_w_avg_8_64x2_c: 2067.3 vvc_w_avg_8_64x2_avx2: 97.6 vvc_w_avg_8_64x4_c: 3901.1 vvc_w_avg_8_64x4_avx2: 154.8 vvc_w_avg_8_64x8_c: 7911.6 vvc_w_avg_8_64x8_avx2: 268.8 vvc_w_avg_8_64x16_c: 16508.8 vvc_w_avg_8_64x16_avx2: 501.8 vvc_w_avg_8_64x32_c: 38770.3 vvc_w_avg_8_64x32_avx2: 1287.6 vvc_w_avg_8_64x64_c: 110350.6 vvc_w_avg_8_64x64_avx2: 1890.8 vvc_w_avg_8_64x128_c: 141354.6 vvc_w_avg_8_64x128_avx2: 3839.6 vvc_w_avg_8_128x2_c: 7012.1 vvc_w_avg_8_128x2_avx2: 159.3 vvc_w_avg_8_128x4_c: 8146.8 vvc_w_avg_8_128x4_avx2: 272.6 vvc_w_avg_8_128x8_c: 24596.8 vvc_w_avg_8_128x8_avx2: 501.1 vvc_w_avg_8_128x16_c: 35918.1 vvc_w_avg_8_128x16_avx2: 948.8 vvc_w_avg_8_128x32_c: 68799.6 vvc_w_avg_8_128x32_avx2: 1963.1 vvc_w_avg_8_128x64_c: 133862.1 vvc_w_avg_8_128x64_avx2: 3833.6 vvc_w_avg_8_128x128_c: 348427.8 vvc_w_avg_8_128x128_avx2: 7682.8 vvc_w_avg_10_2x2_c: 118.6 vvc_w_avg_10_2x2_avx2: 73.1 vvc_w_avg_10_2x4_c: 189.1 vvc_w_avg_10_2x4_avx2: 89.3 vvc_w_avg_10_2x8_c: 382.8 vvc_w_avg_10_2x8_avx2: 179.8 vvc_w_avg_10_2x16_c: 658.3 vvc_w_avg_10_2x16_avx2: 185.1 vvc_w_avg_10_2x32_c: 1409.3 vvc_w_avg_10_2x32_avx2: 290.8 vvc_w_avg_10_2x64_c: 2906.8 vvc_w_avg_10_2x64_avx2: 793.1 vvc_w_avg_10_2x128_c: 6292.6 vvc_w_avg_10_2x128_avx2: 1696.8 vvc_w_avg_10_4x2_c: 178.8 vvc_w_avg_10_4x2_avx2: 80.1 vvc_w_avg_10_4x4_c: 581.6 vvc_w_avg_10_4x4_avx2: 97.6 vvc_w_avg_10_4x8_c: 693.3 vvc_w_avg_10_4x8_avx2: 128.1 vvc_w_avg_10_4x16_c: 1436.6 vvc_w_avg_10_4x16_avx2: 179.8 vvc_w_avg_10_4x32_c: 2409.1 vvc_w_avg_10_4x32_avx2: 292.3 vvc_w_avg_10_4x64_c: 4925.3 vvc_w_avg_10_4x64_avx2: 746.1 vvc_w_avg_10_4x128_c: 10664.6 vvc_w_avg_10_4x128_avx2: 1647.6 vvc_w_avg_10_8x2_c: 359.3 vvc_w_avg_10_8x2_avx2: 80.1 vvc_w_avg_10_8x4_c: 925.6 vvc_w_avg_10_8x4_avx2: 97.6 vvc_w_avg_10_8x8_c: 1360.6 vvc_w_avg_10_8x8_avx2: 121.8 vvc_w_avg_10_8x16_c: 3490.3 vvc_w_avg_10_8x16_avx2: 203.3 vvc_w_avg_10_8x32_c: 5266.1 vvc_w_avg_10_8x32_avx2: 325.8 vvc_w_avg_10_8x64_c: 11127.1 vvc_w_avg_10_8x64_avx2: 747.8 vvc_w_avg_10_8x128_c: 31058.3 vvc_w_avg_10_8x128_avx2: 1424.6 vvc_w_avg_10_16x2_c: 624.8 vvc_w_avg_10_16x2_avx2: 84.6 vvc_w_avg_10_16x4_c: 1389.6 vvc_w_avg_10_16x4_avx2: 109.1 vvc_w_avg_10_16x8_c: 2688.3 vvc_w_avg_10_16x8_avx2: 137.1 vvc_w_avg_10_16x16_c: 5387.1 vvc_w_avg_10_16x16_avx2: 224.6 vvc_w_avg_10_16x32_c: 10776.3 vvc_w_avg_10_16x32_avx2: 312.1 vvc_w_avg_10_16x64_c: 18069.1 vvc_w_avg_10_16x64_avx2: 858.6 vvc_w_avg_10_16x128_c: 43460.3 vvc_w_avg_10_16x128_avx2: 1411.6 vvc_w_avg_10_32x2_c: 1232.8 vvc_w_avg_10_32x2_avx2: 99.1 vvc_w_avg_10_32x4_c: 4017.6 vvc_w_avg_10_32x4_avx2: 134.1 vvc_w_avg_10_32x8_c: 9306.3 vvc_w_avg_10_32x8_avx2: 208.1 vvc_w_avg_10_32x16_c: 8424.6 vvc_w_avg_10_32x16_avx2: 349.3 vvc_w_avg_10_32x32_c: 20787.8 vvc_w_avg_10_32x32_avx2: 655.3 vvc_w_avg_10_32x64_c: 40972.1 vvc_w_avg_10_32x64_avx2: 904.8 vvc_w_avg_10_32x128_c: 85670.3 vvc_w_avg_10_32x128_avx2: 1751.6 vvc_w_avg_10_64x2_c: 2454.1 vvc_w_avg_10_64x2_avx2: 132.6 vvc_w_avg_10_64x4_c: 5012.6 vvc_w_avg_10_64x4_avx2: 215.6 vvc_w_avg_10_64x8_c: 10811.3 vvc_w_avg_10_64x8_avx2: 361.1 vvc_w_avg_10_64x16_c: 33349.1 vvc_w_avg_10_64x16_avx2: 904.1 vvc_w_avg_10_64x32_c: 41892.3 vvc_w_avg_10_64x32_avx2: 1220.6 vvc_w_avg_10_64x64_c: 66983.3 vvc_w_avg_10_64x64_avx2: 2622.1 vvc_w_avg_10_64x128_c: 246508.8 vvc_w_avg_10_64x128_avx2: 3316.8 vvc_w_avg_10_128x2_c: 7791.6 vvc_w_avg_10_128x2_avx2: 198.8 vvc_w_avg_10_128x4_c: 10534.3 vvc_w_avg_10_128x4_avx2: 337.3 vvc_w_avg_10_128x8_c: 21142.3 vvc_w_avg_10_128x8_avx2: 614.8 vvc_w_avg_10_128x16_c: 40968.6 vvc_w_avg_10_128x16_avx2: 1160.6 vvc_w_avg_10_128x32_c: 113043.3 vvc_w_avg_10_128x32_avx2: 1644.6 vvc_w_avg_10_128x64_c: 230658.3 vvc_w_avg_10_128x64_avx2: 5065.3 vvc_w_avg_10_128x128_c: 335236.3 vvc_w_avg_10_128x128_avx2: 6450.3 vvc_w_avg_12_2x2_c: 185.3 vvc_w_avg_12_2x2_avx2: 43.6 vvc_w_avg_12_2x4_c: 340.3 vvc_w_avg_12_2x4_avx2: 55.8 vvc_w_avg_12_2x8_c: 632.3 vvc_w_avg_12_2x8_avx2: 70.1 vvc_w_avg_12_2x16_c: 728.3 vvc_w_avg_12_2x16_avx2: 108.1 vvc_w_avg_12_2x32_c: 1392.6 vvc_w_avg_12_2x32_avx2: 176.8 vvc_w_avg_12_2x64_c: 2618.3 vvc_w_avg_12_2x64_avx2: 757.3 vvc_w_avg_12_2x128_c: 6408.8 vvc_w_avg_12_2x128_avx2: 1435.1 vvc_w_avg_12_4x2_c: 349.3 vvc_w_avg_12_4x2_avx2: 44.3 vvc_w_avg_12_4x4_c: 607.1 vvc_w_avg_12_4x4_avx2: 52.6 vvc_w_avg_12_4x8_c: 1134.8 vvc_w_avg_12_4x8_avx2: 70.1 vvc_w_avg_12_4x16_c: 1378.1 vvc_w_avg_12_4x16_avx2: 115.3 vvc_w_avg_12_4x32_c: 2599.3 vvc_w_avg_12_4x32_avx2: 174.3 vvc_w_avg_12_4x64_c: 4474.8 vvc_w_avg_12_4x64_avx2: 656.1 vvc_w_avg_12_4x128_c: 11319.6 vvc_w_avg_12_4x128_avx2: 1373.1 vvc_w_avg_12_8x2_c: 595.8 vvc_w_avg_12_8x2_avx2: 44.3 vvc_w_avg_12_8x4_c: 1164.3 vvc_w_avg_12_8x4_avx2: 56.6 vvc_w_avg_12_8x8_c: 2019.6 vvc_w_avg_12_8x8_avx2: 80.1 vvc_w_avg_12_8x16_c: 4071.6 vvc_w_avg_12_8x16_avx2: 139.3 vvc_w_avg_12_8x32_c: 4485.1 vvc_w_avg_12_8x32_avx2: 250.6 vvc_w_avg_12_8x64_c: 8404.8 vvc_w_avg_12_8x64_avx2: 735.8 vvc_w_avg_12_8x128_c: 35679.8 vvc_w_avg_12_8x128_avx2: 1252.6 vvc_w_avg_12_16x2_c: 1114.8 vvc_w_avg_12_16x2_avx2: 46.6 vvc_w_avg_12_16x4_c: 2240.1 vvc_w_avg_12_16x4_avx2: 62.6 vvc_w_avg_12_16x8_c: 13174.6 vvc_w_avg_12_16x8_avx2: 88.6 vvc_w_avg_12_16x16_c: 5334.6 vvc_w_avg_12_16x16_avx2: 144.3 vvc_w_avg_12_16x32_c: 8378.1 vvc_w_avg_12_16x32_avx2: 234.6 vvc_w_avg_12_16x64_c: 21300.8 vvc_w_avg_12_16x64_avx2: 761.8 vvc_w_avg_12_16x128_c: 32786.8 vvc_w_avg_12_16x128_avx2: 1432.8 vvc_w_avg_12_32x2_c: 2154.3 vvc_w_avg_12_32x2_avx2: 61.1 vvc_w_avg_12_32x4_c: 4299.8 vvc_w_avg_12_32x4_avx2: 83.1 vvc_w_avg_12_32x8_c: 7964.8 vvc_w_avg_12_32x8_avx2: 132.6 vvc_w_avg_12_32x16_c: 13321.6 vvc_w_avg_12_32x16_avx2: 234.6 vvc_w_avg_12_32x32_c: 21149.3 vvc_w_avg_12_32x32_avx2: 433.3 vvc_w_avg_12_32x64_c: 43666.6 vvc_w_avg_12_32x64_avx2: 876.6 vvc_w_avg_12_32x128_c: 83189.8 vvc_w_avg_12_32x128_avx2: 1756.6 vvc_w_avg_12_64x2_c: 3829.8 vvc_w_avg_12_64x2_avx2: 83.1 vvc_w_avg_12_64x4_c: 8588.1 vvc_w_avg_12_64x4_avx2: 127.1 vvc_w_avg_12_64x8_c: 17027.6 vvc_w_avg_12_64x8_avx2: 310.6 vvc_w_avg_12_64x16_c: 29797.8 vvc_w_avg_12_64x16_avx2: 415.6 vvc_w_avg_12_64x32_c: 43854.3 vvc_w_avg_12_64x32_avx2: 773.3 vvc_w_avg_12_64x64_c: 137767.3 vvc_w_avg_12_64x64_avx2: 1608.6 vvc_w_avg_12_64x128_c: 316428.3 vvc_w_avg_12_64x128_avx2: 3249.8 vvc_w_avg_12_128x2_c: 8824.6 vvc_w_avg_12_128x2_avx2: 130.3 vvc_w_avg_12_128x4_c: 17173.6 vvc_w_avg_12_128x4_avx2: 219.3 vvc_w_avg_12_128x8_c: 21997.8 vvc_w_avg_12_128x8_avx2: 397.3 vvc_w_avg_12_128x16_c: 43553.8 vvc_w_avg_12_128x16_avx2: 790.1 vvc_w_avg_12_128x32_c: 89792.1 vvc_w_avg_12_128x32_avx2: 1497.6 vvc_w_avg_12_128x64_c: 226573.3 vvc_w_avg_12_128x64_avx2: 3153.1 vvc_w_avg_12_128x128_c: 332090.1 vvc_w_avg_12_128x128_avx2: 6499.6 Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
2024-01-23 18:17:10 +00:00
if (EXTERNAL_AVX2(cpu_flags)) {
switch (bd) {
case 8:
AVG_INIT(8, avx2);
break;
case 10:
AVG_INIT(10, avx2);
break;
case 12:
AVG_INIT(12, avx2);
break;
default:
break;
avcodec/x86/vvc: add avg and avg_w AVX2 optimizations The avg/avg_w is based on dav1d. See https://code.videolan.org/videolan/dav1d/-/blob/master/src/x86/mc_avx2.asm vvc_avg_8_2x2_c: 71.6 vvc_avg_8_2x2_avx2: 26.8 vvc_avg_8_2x4_c: 140.8 vvc_avg_8_2x4_avx2: 34.6 vvc_avg_8_2x8_c: 410.3 vvc_avg_8_2x8_avx2: 41.3 vvc_avg_8_2x16_c: 769.3 vvc_avg_8_2x16_avx2: 60.3 vvc_avg_8_2x32_c: 1669.6 vvc_avg_8_2x32_avx2: 105.1 vvc_avg_8_2x64_c: 1978.3 vvc_avg_8_2x64_avx2: 425.8 vvc_avg_8_2x128_c: 6536.8 vvc_avg_8_2x128_avx2: 1315.1 vvc_avg_8_4x2_c: 155.6 vvc_avg_8_4x2_avx2: 26.1 vvc_avg_8_4x4_c: 250.3 vvc_avg_8_4x4_avx2: 31.3 vvc_avg_8_4x8_c: 831.8 vvc_avg_8_4x8_avx2: 41.3 vvc_avg_8_4x16_c: 1461.1 vvc_avg_8_4x16_avx2: 57.1 vvc_avg_8_4x32_c: 2821.6 vvc_avg_8_4x32_avx2: 105.1 vvc_avg_8_4x64_c: 3615.8 vvc_avg_8_4x64_avx2: 412.6 vvc_avg_8_4x128_c: 11962.6 vvc_avg_8_4x128_avx2: 1274.3 vvc_avg_8_8x2_c: 215.8 vvc_avg_8_8x2_avx2: 29.1 vvc_avg_8_8x4_c: 430.6 vvc_avg_8_8x4_avx2: 37.6 vvc_avg_8_8x8_c: 1463.3 vvc_avg_8_8x8_avx2: 51.8 vvc_avg_8_8x16_c: 2630.1 vvc_avg_8_8x16_avx2: 97.6 vvc_avg_8_8x32_c: 5813.8 vvc_avg_8_8x32_avx2: 196.6 vvc_avg_8_8x64_c: 6687.3 vvc_avg_8_8x64_avx2: 487.8 vvc_avg_8_8x128_c: 13178.6 vvc_avg_8_8x128_avx2: 1290.6 vvc_avg_8_16x2_c: 443.8 vvc_avg_8_16x2_avx2: 28.3 vvc_avg_8_16x4_c: 1253.3 vvc_avg_8_16x4_avx2: 32.1 vvc_avg_8_16x8_c: 2236.3 vvc_avg_8_16x8_avx2: 44.3 vvc_avg_8_16x16_c: 5127.8 vvc_avg_8_16x16_avx2: 63.3 vvc_avg_8_16x32_c: 6573.3 vvc_avg_8_16x32_avx2: 223.6 vvc_avg_8_16x64_c: 30311.8 vvc_avg_8_16x64_avx2: 437.8 vvc_avg_8_16x128_c: 25693.3 vvc_avg_8_16x128_avx2: 1266.8 vvc_avg_8_32x2_c: 954.6 vvc_avg_8_32x2_avx2: 32.1 vvc_avg_8_32x4_c: 2359.6 vvc_avg_8_32x4_avx2: 39.6 vvc_avg_8_32x8_c: 5703.6 vvc_avg_8_32x8_avx2: 57.1 vvc_avg_8_32x16_c: 9967.6 vvc_avg_8_32x16_avx2: 107.1 vvc_avg_8_32x32_c: 21327.6 vvc_avg_8_32x32_avx2: 272.6 vvc_avg_8_32x64_c: 39240.8 vvc_avg_8_32x64_avx2: 529.6 vvc_avg_8_32x128_c: 52580.8 vvc_avg_8_32x128_avx2: 1338.8 vvc_avg_8_64x2_c: 1647.3 vvc_avg_8_64x2_avx2: 38.8 vvc_avg_8_64x4_c: 5130.1 vvc_avg_8_64x4_avx2: 58.8 vvc_avg_8_64x8_c: 6529.3 vvc_avg_8_64x8_avx2: 88.3 vvc_avg_8_64x16_c: 19913.6 vvc_avg_8_64x16_avx2: 162.3 vvc_avg_8_64x32_c: 39360.8 vvc_avg_8_64x32_avx2: 295.8 vvc_avg_8_64x64_c: 49658.3 vvc_avg_8_64x64_avx2: 784.1 vvc_avg_8_64x128_c: 108513.1 vvc_avg_8_64x128_avx2: 1977.1 vvc_avg_8_128x2_c: 3226.1 vvc_avg_8_128x2_avx2: 61.1 vvc_avg_8_128x4_c: 10280.3 vvc_avg_8_128x4_avx2: 94.6 vvc_avg_8_128x8_c: 18079.3 vvc_avg_8_128x8_avx2: 155.3 vvc_avg_8_128x16_c: 45121.8 vvc_avg_8_128x16_avx2: 285.3 vvc_avg_8_128x32_c: 48651.8 vvc_avg_8_128x32_avx2: 581.6 vvc_avg_8_128x64_c: 165078.6 vvc_avg_8_128x64_avx2: 1942.8 vvc_avg_8_128x128_c: 339103.1 vvc_avg_8_128x128_avx2: 4332.6 vvc_avg_10_2x2_c: 144.3 vvc_avg_10_2x2_avx2: 26.8 vvc_avg_10_2x4_c: 142.6 vvc_avg_10_2x4_avx2: 45.3 vvc_avg_10_2x8_c: 478.1 vvc_avg_10_2x8_avx2: 38.1 vvc_avg_10_2x16_c: 518.3 vvc_avg_10_2x16_avx2: 58.1 vvc_avg_10_2x32_c: 2059.8 vvc_avg_10_2x32_avx2: 93.1 vvc_avg_10_2x64_c: 2383.8 vvc_avg_10_2x64_avx2: 714.8 vvc_avg_10_2x128_c: 4498.3 vvc_avg_10_2x128_avx2: 1466.3 vvc_avg_10_4x2_c: 228.6 vvc_avg_10_4x2_avx2: 26.8 vvc_avg_10_4x4_c: 378.3 vvc_avg_10_4x4_avx2: 30.6 vvc_avg_10_4x8_c: 866.8 vvc_avg_10_4x8_avx2: 44.6 vvc_avg_10_4x16_c: 1018.1 vvc_avg_10_4x16_avx2: 58.1 vvc_avg_10_4x32_c: 3590.8 vvc_avg_10_4x32_avx2: 128.8 vvc_avg_10_4x64_c: 4200.8 vvc_avg_10_4x64_avx2: 663.6 vvc_avg_10_4x128_c: 8450.8 vvc_avg_10_4x128_avx2: 1531.8 vvc_avg_10_8x2_c: 369.3 vvc_avg_10_8x2_avx2: 28.3 vvc_avg_10_8x4_c: 513.8 vvc_avg_10_8x4_avx2: 32.1 vvc_avg_10_8x8_c: 1720.3 vvc_avg_10_8x8_avx2: 49.1 vvc_avg_10_8x16_c: 1894.8 vvc_avg_10_8x16_avx2: 71.6 vvc_avg_10_8x32_c: 3931.3 vvc_avg_10_8x32_avx2: 148.1 vvc_avg_10_8x64_c: 7964.3 vvc_avg_10_8x64_avx2: 613.1 vvc_avg_10_8x128_c: 15540.1 vvc_avg_10_8x128_avx2: 1585.1 vvc_avg_10_16x2_c: 877.3 vvc_avg_10_16x2_avx2: 27.6 vvc_avg_10_16x4_c: 955.8 vvc_avg_10_16x4_avx2: 29.8 vvc_avg_10_16x8_c: 3419.6 vvc_avg_10_16x8_avx2: 62.6 vvc_avg_10_16x16_c: 3826.8 vvc_avg_10_16x16_avx2: 54.3 vvc_avg_10_16x32_c: 7655.3 vvc_avg_10_16x32_avx2: 86.3 vvc_avg_10_16x64_c: 30011.1 vvc_avg_10_16x64_avx2: 692.6 vvc_avg_10_16x128_c: 47894.8 vvc_avg_10_16x128_avx2: 1580.3 vvc_avg_10_32x2_c: 944.3 vvc_avg_10_32x2_avx2: 29.8 vvc_avg_10_32x4_c: 2022.6 vvc_avg_10_32x4_avx2: 35.1 vvc_avg_10_32x8_c: 6148.8 vvc_avg_10_32x8_avx2: 51.3 vvc_avg_10_32x16_c: 12601.6 vvc_avg_10_32x16_avx2: 70.8 vvc_avg_10_32x32_c: 15958.6 vvc_avg_10_32x32_avx2: 124.3 vvc_avg_10_32x64_c: 31784.6 vvc_avg_10_32x64_avx2: 757.3 vvc_avg_10_32x128_c: 63892.8 vvc_avg_10_32x128_avx2: 1711.3 vvc_avg_10_64x2_c: 1890.8 vvc_avg_10_64x2_avx2: 34.3 vvc_avg_10_64x4_c: 6267.3 vvc_avg_10_64x4_avx2: 42.6 vvc_avg_10_64x8_c: 12778.1 vvc_avg_10_64x8_avx2: 67.8 vvc_avg_10_64x16_c: 22304.3 vvc_avg_10_64x16_avx2: 116.8 vvc_avg_10_64x32_c: 30777.1 vvc_avg_10_64x32_avx2: 201.1 vvc_avg_10_64x64_c: 60169.1 vvc_avg_10_64x64_avx2: 1454.3 vvc_avg_10_64x128_c: 124392.8 vvc_avg_10_64x128_avx2: 3648.6 vvc_avg_10_128x2_c: 3650.1 vvc_avg_10_128x2_avx2: 41.1 vvc_avg_10_128x4_c: 22887.8 vvc_avg_10_128x4_avx2: 64.1 vvc_avg_10_128x8_c: 14622.6 vvc_avg_10_128x8_avx2: 111.6 vvc_avg_10_128x16_c: 62207.6 vvc_avg_10_128x16_avx2: 186.3 vvc_avg_10_128x32_c: 59761.3 vvc_avg_10_128x32_avx2: 374.6 vvc_avg_10_128x64_c: 117504.3 vvc_avg_10_128x64_avx2: 2684.6 vvc_avg_10_128x128_c: 236767.6 vvc_avg_10_128x128_avx2: 15278.1 vvc_avg_12_2x2_c: 78.6 vvc_avg_12_2x2_avx2: 26.1 vvc_avg_12_2x4_c: 254.1 vvc_avg_12_2x4_avx2: 30.6 vvc_avg_12_2x8_c: 261.8 vvc_avg_12_2x8_avx2: 39.1 vvc_avg_12_2x16_c: 527.6 vvc_avg_12_2x16_avx2: 57.3 vvc_avg_12_2x32_c: 1089.1 vvc_avg_12_2x32_avx2: 93.8 vvc_avg_12_2x64_c: 2337.6 vvc_avg_12_2x64_avx2: 707.1 vvc_avg_12_2x128_c: 4582.1 vvc_avg_12_2x128_avx2: 1414.6 vvc_avg_12_4x2_c: 129.6 vvc_avg_12_4x2_avx2: 26.8 vvc_avg_12_4x4_c: 427.3 vvc_avg_12_4x4_avx2: 30.6 vvc_avg_12_4x8_c: 529.6 vvc_avg_12_4x8_avx2: 36.6 vvc_avg_12_4x16_c: 1022.1 vvc_avg_12_4x16_avx2: 57.3 vvc_avg_12_4x32_c: 1987.6 vvc_avg_12_4x32_avx2: 84.3 vvc_avg_12_4x64_c: 4147.6 vvc_avg_12_4x64_avx2: 706.3 vvc_avg_12_4x128_c: 8469.3 vvc_avg_12_4x128_avx2: 1448.3 vvc_avg_12_8x2_c: 253.6 vvc_avg_12_8x2_avx2: 27.6 vvc_avg_12_8x4_c: 836.3 vvc_avg_12_8x4_avx2: 32.1 vvc_avg_12_8x8_c: 1074.6 vvc_avg_12_8x8_avx2: 45.1 vvc_avg_12_8x16_c: 3616.8 vvc_avg_12_8x16_avx2: 71.6 vvc_avg_12_8x32_c: 3823.6 vvc_avg_12_8x32_avx2: 140.1 vvc_avg_12_8x64_c: 7764.8 vvc_avg_12_8x64_avx2: 656.1 vvc_avg_12_8x128_c: 15896.1 vvc_avg_12_8x128_avx2: 1232.8 vvc_avg_12_16x2_c: 462.1 vvc_avg_12_16x2_avx2: 26.8 vvc_avg_12_16x4_c: 1732.1 vvc_avg_12_16x4_avx2: 29.1 vvc_avg_12_16x8_c: 2097.6 vvc_avg_12_16x8_avx2: 62.6 vvc_avg_12_16x16_c: 6753.1 vvc_avg_12_16x16_avx2: 47.8 vvc_avg_12_16x32_c: 7373.1 vvc_avg_12_16x32_avx2: 80.8 vvc_avg_12_16x64_c: 15046.3 vvc_avg_12_16x64_avx2: 621.1 vvc_avg_12_16x128_c: 52574.6 vvc_avg_12_16x128_avx2: 1417.1 vvc_avg_12_32x2_c: 1712.1 vvc_avg_12_32x2_avx2: 29.8 vvc_avg_12_32x4_c: 2036.8 vvc_avg_12_32x4_avx2: 37.6 vvc_avg_12_32x8_c: 4017.6 vvc_avg_12_32x8_avx2: 44.1 vvc_avg_12_32x16_c: 8018.6 vvc_avg_12_32x16_avx2: 70.8 vvc_avg_12_32x32_c: 15637.6 vvc_avg_12_32x32_avx2: 124.3 vvc_avg_12_32x64_c: 31143.3 vvc_avg_12_32x64_avx2: 830.3 vvc_avg_12_32x128_c: 75706.8 vvc_avg_12_32x128_avx2: 1604.8 vvc_avg_12_64x2_c: 3230.3 vvc_avg_12_64x2_avx2: 33.6 vvc_avg_12_64x4_c: 4139.6 vvc_avg_12_64x4_avx2: 45.1 vvc_avg_12_64x8_c: 8201.6 vvc_avg_12_64x8_avx2: 67.1 vvc_avg_12_64x16_c: 25632.3 vvc_avg_12_64x16_avx2: 110.3 vvc_avg_12_64x32_c: 30744.3 vvc_avg_12_64x32_avx2: 200.3 vvc_avg_12_64x64_c: 105554.8 vvc_avg_12_64x64_avx2: 1325.6 vvc_avg_12_64x128_c: 235254.3 vvc_avg_12_64x128_avx2: 3132.6 vvc_avg_12_128x2_c: 6194.3 vvc_avg_12_128x2_avx2: 55.1 vvc_avg_12_128x4_c: 7583.8 vvc_avg_12_128x4_avx2: 79.3 vvc_avg_12_128x8_c: 14635.6 vvc_avg_12_128x8_avx2: 104.3 vvc_avg_12_128x16_c: 29270.8 vvc_avg_12_128x16_avx2: 194.3 vvc_avg_12_128x32_c: 60113.6 vvc_avg_12_128x32_avx2: 346.3 vvc_avg_12_128x64_c: 197030.3 vvc_avg_12_128x64_avx2: 2779.6 vvc_avg_12_128x128_c: 432809.6 vvc_avg_12_128x128_avx2: 5513.3 vvc_w_avg_8_2x2_c: 84.3 vvc_w_avg_8_2x2_avx2: 42.6 vvc_w_avg_8_2x4_c: 156.3 vvc_w_avg_8_2x4_avx2: 58.8 vvc_w_avg_8_2x8_c: 310.6 vvc_w_avg_8_2x8_avx2: 73.1 vvc_w_avg_8_2x16_c: 942.1 vvc_w_avg_8_2x16_avx2: 113.3 vvc_w_avg_8_2x32_c: 1098.8 vvc_w_avg_8_2x32_avx2: 202.6 vvc_w_avg_8_2x64_c: 2414.3 vvc_w_avg_8_2x64_avx2: 467.6 vvc_w_avg_8_2x128_c: 4763.8 vvc_w_avg_8_2x128_avx2: 1333.1 vvc_w_avg_8_4x2_c: 140.1 vvc_w_avg_8_4x2_avx2: 49.8 vvc_w_avg_8_4x4_c: 276.3 vvc_w_avg_8_4x4_avx2: 58.1 vvc_w_avg_8_4x8_c: 524.3 vvc_w_avg_8_4x8_avx2: 72.3 vvc_w_avg_8_4x16_c: 1108.1 vvc_w_avg_8_4x16_avx2: 111.8 vvc_w_avg_8_4x32_c: 2149.8 vvc_w_avg_8_4x32_avx2: 199.6 vvc_w_avg_8_4x64_c: 12288.1 vvc_w_avg_8_4x64_avx2: 509.3 vvc_w_avg_8_4x128_c: 8398.6 vvc_w_avg_8_4x128_avx2: 1319.6 vvc_w_avg_8_8x2_c: 271.1 vvc_w_avg_8_8x2_avx2: 44.1 vvc_w_avg_8_8x4_c: 503.3 vvc_w_avg_8_8x4_avx2: 61.8 vvc_w_avg_8_8x8_c: 1031.1 vvc_w_avg_8_8x8_avx2: 93.8 vvc_w_avg_8_8x16_c: 2009.8 vvc_w_avg_8_8x16_avx2: 163.1 vvc_w_avg_8_8x32_c: 4161.3 vvc_w_avg_8_8x32_avx2: 292.1 vvc_w_avg_8_8x64_c: 7940.6 vvc_w_avg_8_8x64_avx2: 592.1 vvc_w_avg_8_8x128_c: 16802.3 vvc_w_avg_8_8x128_avx2: 1287.6 vvc_w_avg_8_16x2_c: 762.6 vvc_w_avg_8_16x2_avx2: 53.6 vvc_w_avg_8_16x4_c: 1486.3 vvc_w_avg_8_16x4_avx2: 67.1 vvc_w_avg_8_16x8_c: 1907.8 vvc_w_avg_8_16x8_avx2: 96.8 vvc_w_avg_8_16x16_c: 3883.6 vvc_w_avg_8_16x16_avx2: 151.3 vvc_w_avg_8_16x32_c: 7974.8 vvc_w_avg_8_16x32_avx2: 285.8 vvc_w_avg_8_16x64_c: 25160.6 vvc_w_avg_8_16x64_avx2: 589.8 vvc_w_avg_8_16x128_c: 58328.1 vvc_w_avg_8_16x128_avx2: 1169.8 vvc_w_avg_8_32x2_c: 1009.1 vvc_w_avg_8_32x2_avx2: 65.6 vvc_w_avg_8_32x4_c: 2091.1 vvc_w_avg_8_32x4_avx2: 96.8 vvc_w_avg_8_32x8_c: 3997.8 vvc_w_avg_8_32x8_avx2: 156.3 vvc_w_avg_8_32x16_c: 8216.8 vvc_w_avg_8_32x16_avx2: 269.6 vvc_w_avg_8_32x32_c: 21746.1 vvc_w_avg_8_32x32_avx2: 635.3 vvc_w_avg_8_32x64_c: 31564.8 vvc_w_avg_8_32x64_avx2: 1010.6 vvc_w_avg_8_32x128_c: 114373.3 vvc_w_avg_8_32x128_avx2: 2013.6 vvc_w_avg_8_64x2_c: 2067.3 vvc_w_avg_8_64x2_avx2: 97.6 vvc_w_avg_8_64x4_c: 3901.1 vvc_w_avg_8_64x4_avx2: 154.8 vvc_w_avg_8_64x8_c: 7911.6 vvc_w_avg_8_64x8_avx2: 268.8 vvc_w_avg_8_64x16_c: 16508.8 vvc_w_avg_8_64x16_avx2: 501.8 vvc_w_avg_8_64x32_c: 38770.3 vvc_w_avg_8_64x32_avx2: 1287.6 vvc_w_avg_8_64x64_c: 110350.6 vvc_w_avg_8_64x64_avx2: 1890.8 vvc_w_avg_8_64x128_c: 141354.6 vvc_w_avg_8_64x128_avx2: 3839.6 vvc_w_avg_8_128x2_c: 7012.1 vvc_w_avg_8_128x2_avx2: 159.3 vvc_w_avg_8_128x4_c: 8146.8 vvc_w_avg_8_128x4_avx2: 272.6 vvc_w_avg_8_128x8_c: 24596.8 vvc_w_avg_8_128x8_avx2: 501.1 vvc_w_avg_8_128x16_c: 35918.1 vvc_w_avg_8_128x16_avx2: 948.8 vvc_w_avg_8_128x32_c: 68799.6 vvc_w_avg_8_128x32_avx2: 1963.1 vvc_w_avg_8_128x64_c: 133862.1 vvc_w_avg_8_128x64_avx2: 3833.6 vvc_w_avg_8_128x128_c: 348427.8 vvc_w_avg_8_128x128_avx2: 7682.8 vvc_w_avg_10_2x2_c: 118.6 vvc_w_avg_10_2x2_avx2: 73.1 vvc_w_avg_10_2x4_c: 189.1 vvc_w_avg_10_2x4_avx2: 89.3 vvc_w_avg_10_2x8_c: 382.8 vvc_w_avg_10_2x8_avx2: 179.8 vvc_w_avg_10_2x16_c: 658.3 vvc_w_avg_10_2x16_avx2: 185.1 vvc_w_avg_10_2x32_c: 1409.3 vvc_w_avg_10_2x32_avx2: 290.8 vvc_w_avg_10_2x64_c: 2906.8 vvc_w_avg_10_2x64_avx2: 793.1 vvc_w_avg_10_2x128_c: 6292.6 vvc_w_avg_10_2x128_avx2: 1696.8 vvc_w_avg_10_4x2_c: 178.8 vvc_w_avg_10_4x2_avx2: 80.1 vvc_w_avg_10_4x4_c: 581.6 vvc_w_avg_10_4x4_avx2: 97.6 vvc_w_avg_10_4x8_c: 693.3 vvc_w_avg_10_4x8_avx2: 128.1 vvc_w_avg_10_4x16_c: 1436.6 vvc_w_avg_10_4x16_avx2: 179.8 vvc_w_avg_10_4x32_c: 2409.1 vvc_w_avg_10_4x32_avx2: 292.3 vvc_w_avg_10_4x64_c: 4925.3 vvc_w_avg_10_4x64_avx2: 746.1 vvc_w_avg_10_4x128_c: 10664.6 vvc_w_avg_10_4x128_avx2: 1647.6 vvc_w_avg_10_8x2_c: 359.3 vvc_w_avg_10_8x2_avx2: 80.1 vvc_w_avg_10_8x4_c: 925.6 vvc_w_avg_10_8x4_avx2: 97.6 vvc_w_avg_10_8x8_c: 1360.6 vvc_w_avg_10_8x8_avx2: 121.8 vvc_w_avg_10_8x16_c: 3490.3 vvc_w_avg_10_8x16_avx2: 203.3 vvc_w_avg_10_8x32_c: 5266.1 vvc_w_avg_10_8x32_avx2: 325.8 vvc_w_avg_10_8x64_c: 11127.1 vvc_w_avg_10_8x64_avx2: 747.8 vvc_w_avg_10_8x128_c: 31058.3 vvc_w_avg_10_8x128_avx2: 1424.6 vvc_w_avg_10_16x2_c: 624.8 vvc_w_avg_10_16x2_avx2: 84.6 vvc_w_avg_10_16x4_c: 1389.6 vvc_w_avg_10_16x4_avx2: 109.1 vvc_w_avg_10_16x8_c: 2688.3 vvc_w_avg_10_16x8_avx2: 137.1 vvc_w_avg_10_16x16_c: 5387.1 vvc_w_avg_10_16x16_avx2: 224.6 vvc_w_avg_10_16x32_c: 10776.3 vvc_w_avg_10_16x32_avx2: 312.1 vvc_w_avg_10_16x64_c: 18069.1 vvc_w_avg_10_16x64_avx2: 858.6 vvc_w_avg_10_16x128_c: 43460.3 vvc_w_avg_10_16x128_avx2: 1411.6 vvc_w_avg_10_32x2_c: 1232.8 vvc_w_avg_10_32x2_avx2: 99.1 vvc_w_avg_10_32x4_c: 4017.6 vvc_w_avg_10_32x4_avx2: 134.1 vvc_w_avg_10_32x8_c: 9306.3 vvc_w_avg_10_32x8_avx2: 208.1 vvc_w_avg_10_32x16_c: 8424.6 vvc_w_avg_10_32x16_avx2: 349.3 vvc_w_avg_10_32x32_c: 20787.8 vvc_w_avg_10_32x32_avx2: 655.3 vvc_w_avg_10_32x64_c: 40972.1 vvc_w_avg_10_32x64_avx2: 904.8 vvc_w_avg_10_32x128_c: 85670.3 vvc_w_avg_10_32x128_avx2: 1751.6 vvc_w_avg_10_64x2_c: 2454.1 vvc_w_avg_10_64x2_avx2: 132.6 vvc_w_avg_10_64x4_c: 5012.6 vvc_w_avg_10_64x4_avx2: 215.6 vvc_w_avg_10_64x8_c: 10811.3 vvc_w_avg_10_64x8_avx2: 361.1 vvc_w_avg_10_64x16_c: 33349.1 vvc_w_avg_10_64x16_avx2: 904.1 vvc_w_avg_10_64x32_c: 41892.3 vvc_w_avg_10_64x32_avx2: 1220.6 vvc_w_avg_10_64x64_c: 66983.3 vvc_w_avg_10_64x64_avx2: 2622.1 vvc_w_avg_10_64x128_c: 246508.8 vvc_w_avg_10_64x128_avx2: 3316.8 vvc_w_avg_10_128x2_c: 7791.6 vvc_w_avg_10_128x2_avx2: 198.8 vvc_w_avg_10_128x4_c: 10534.3 vvc_w_avg_10_128x4_avx2: 337.3 vvc_w_avg_10_128x8_c: 21142.3 vvc_w_avg_10_128x8_avx2: 614.8 vvc_w_avg_10_128x16_c: 40968.6 vvc_w_avg_10_128x16_avx2: 1160.6 vvc_w_avg_10_128x32_c: 113043.3 vvc_w_avg_10_128x32_avx2: 1644.6 vvc_w_avg_10_128x64_c: 230658.3 vvc_w_avg_10_128x64_avx2: 5065.3 vvc_w_avg_10_128x128_c: 335236.3 vvc_w_avg_10_128x128_avx2: 6450.3 vvc_w_avg_12_2x2_c: 185.3 vvc_w_avg_12_2x2_avx2: 43.6 vvc_w_avg_12_2x4_c: 340.3 vvc_w_avg_12_2x4_avx2: 55.8 vvc_w_avg_12_2x8_c: 632.3 vvc_w_avg_12_2x8_avx2: 70.1 vvc_w_avg_12_2x16_c: 728.3 vvc_w_avg_12_2x16_avx2: 108.1 vvc_w_avg_12_2x32_c: 1392.6 vvc_w_avg_12_2x32_avx2: 176.8 vvc_w_avg_12_2x64_c: 2618.3 vvc_w_avg_12_2x64_avx2: 757.3 vvc_w_avg_12_2x128_c: 6408.8 vvc_w_avg_12_2x128_avx2: 1435.1 vvc_w_avg_12_4x2_c: 349.3 vvc_w_avg_12_4x2_avx2: 44.3 vvc_w_avg_12_4x4_c: 607.1 vvc_w_avg_12_4x4_avx2: 52.6 vvc_w_avg_12_4x8_c: 1134.8 vvc_w_avg_12_4x8_avx2: 70.1 vvc_w_avg_12_4x16_c: 1378.1 vvc_w_avg_12_4x16_avx2: 115.3 vvc_w_avg_12_4x32_c: 2599.3 vvc_w_avg_12_4x32_avx2: 174.3 vvc_w_avg_12_4x64_c: 4474.8 vvc_w_avg_12_4x64_avx2: 656.1 vvc_w_avg_12_4x128_c: 11319.6 vvc_w_avg_12_4x128_avx2: 1373.1 vvc_w_avg_12_8x2_c: 595.8 vvc_w_avg_12_8x2_avx2: 44.3 vvc_w_avg_12_8x4_c: 1164.3 vvc_w_avg_12_8x4_avx2: 56.6 vvc_w_avg_12_8x8_c: 2019.6 vvc_w_avg_12_8x8_avx2: 80.1 vvc_w_avg_12_8x16_c: 4071.6 vvc_w_avg_12_8x16_avx2: 139.3 vvc_w_avg_12_8x32_c: 4485.1 vvc_w_avg_12_8x32_avx2: 250.6 vvc_w_avg_12_8x64_c: 8404.8 vvc_w_avg_12_8x64_avx2: 735.8 vvc_w_avg_12_8x128_c: 35679.8 vvc_w_avg_12_8x128_avx2: 1252.6 vvc_w_avg_12_16x2_c: 1114.8 vvc_w_avg_12_16x2_avx2: 46.6 vvc_w_avg_12_16x4_c: 2240.1 vvc_w_avg_12_16x4_avx2: 62.6 vvc_w_avg_12_16x8_c: 13174.6 vvc_w_avg_12_16x8_avx2: 88.6 vvc_w_avg_12_16x16_c: 5334.6 vvc_w_avg_12_16x16_avx2: 144.3 vvc_w_avg_12_16x32_c: 8378.1 vvc_w_avg_12_16x32_avx2: 234.6 vvc_w_avg_12_16x64_c: 21300.8 vvc_w_avg_12_16x64_avx2: 761.8 vvc_w_avg_12_16x128_c: 32786.8 vvc_w_avg_12_16x128_avx2: 1432.8 vvc_w_avg_12_32x2_c: 2154.3 vvc_w_avg_12_32x2_avx2: 61.1 vvc_w_avg_12_32x4_c: 4299.8 vvc_w_avg_12_32x4_avx2: 83.1 vvc_w_avg_12_32x8_c: 7964.8 vvc_w_avg_12_32x8_avx2: 132.6 vvc_w_avg_12_32x16_c: 13321.6 vvc_w_avg_12_32x16_avx2: 234.6 vvc_w_avg_12_32x32_c: 21149.3 vvc_w_avg_12_32x32_avx2: 433.3 vvc_w_avg_12_32x64_c: 43666.6 vvc_w_avg_12_32x64_avx2: 876.6 vvc_w_avg_12_32x128_c: 83189.8 vvc_w_avg_12_32x128_avx2: 1756.6 vvc_w_avg_12_64x2_c: 3829.8 vvc_w_avg_12_64x2_avx2: 83.1 vvc_w_avg_12_64x4_c: 8588.1 vvc_w_avg_12_64x4_avx2: 127.1 vvc_w_avg_12_64x8_c: 17027.6 vvc_w_avg_12_64x8_avx2: 310.6 vvc_w_avg_12_64x16_c: 29797.8 vvc_w_avg_12_64x16_avx2: 415.6 vvc_w_avg_12_64x32_c: 43854.3 vvc_w_avg_12_64x32_avx2: 773.3 vvc_w_avg_12_64x64_c: 137767.3 vvc_w_avg_12_64x64_avx2: 1608.6 vvc_w_avg_12_64x128_c: 316428.3 vvc_w_avg_12_64x128_avx2: 3249.8 vvc_w_avg_12_128x2_c: 8824.6 vvc_w_avg_12_128x2_avx2: 130.3 vvc_w_avg_12_128x4_c: 17173.6 vvc_w_avg_12_128x4_avx2: 219.3 vvc_w_avg_12_128x8_c: 21997.8 vvc_w_avg_12_128x8_avx2: 397.3 vvc_w_avg_12_128x16_c: 43553.8 vvc_w_avg_12_128x16_avx2: 790.1 vvc_w_avg_12_128x32_c: 89792.1 vvc_w_avg_12_128x32_avx2: 1497.6 vvc_w_avg_12_128x64_c: 226573.3 vvc_w_avg_12_128x64_avx2: 3153.1 vvc_w_avg_12_128x128_c: 332090.1 vvc_w_avg_12_128x128_avx2: 6499.6 Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
2024-01-23 18:17:10 +00:00
}
}
#endif
}