diff --git a/libavcodec/x86/hevc_sao_10bit.asm b/libavcodec/x86/hevc_sao_10bit.asm index 79776ac086..f81e2d5033 100644 --- a/libavcodec/x86/hevc_sao_10bit.asm +++ b/libavcodec/x86/hevc_sao_10bit.asm @@ -252,7 +252,7 @@ cglobal hevc_sao_edge_filter_%2_%1, 1, 6, 8, 5*mmsize, dst, src, dststride, a_st %endif ; ARCH -%if cpuflag(avx2) +%if mmsize > 16 SPLATW m8, [offsetq+2] SPLATW m9, [offsetq+4] SPLATW m10, [offsetq+0] @@ -352,11 +352,18 @@ HEVC_SAO_EDGE_FILTER 12, 48, 6 HEVC_SAO_EDGE_FILTER 12, 64, 8 %if HAVE_AVX2_EXTERNAL +INIT_XMM avx2 +HEVC_SAO_EDGE_FILTER 10, 8, 1 INIT_YMM avx2 +HEVC_SAO_EDGE_FILTER 10, 16, 1 HEVC_SAO_EDGE_FILTER 10, 32, 2 HEVC_SAO_EDGE_FILTER 10, 48, 3 HEVC_SAO_EDGE_FILTER 10, 64, 4 +INIT_XMM avx2 +HEVC_SAO_EDGE_FILTER 12, 8, 1 +INIT_YMM avx2 +HEVC_SAO_EDGE_FILTER 12, 16, 1 HEVC_SAO_EDGE_FILTER 12, 32, 2 HEVC_SAO_EDGE_FILTER 12, 48, 3 HEVC_SAO_EDGE_FILTER 12, 64, 4 diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 2181f6daf2..0de01637ad 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -1045,9 +1045,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; } SAO_BAND_INIT(10, avx2); - c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_10_avx2; - c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_10_avx2; - c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_10_avx2; + SAO_EDGE_INIT(10, avx2); c->transform_add[2] = ff_hevc_transform_add16_10_avx2; c->transform_add[3] = ff_hevc_transform_add32_10_avx2; @@ -1101,9 +1099,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2; SAO_BAND_INIT(12, avx2); - c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_12_avx2; - c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_12_avx2; - c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_12_avx2; + SAO_EDGE_INIT(12, avx2); } } }