diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm index 47022d3610..feea50c67b 100644 --- a/libavcodec/x86/hevc_res_add.asm +++ b/libavcodec/x86/hevc_res_add.asm @@ -156,8 +156,8 @@ cglobal hevc_transform_add4_8, 3, 4, 6 %endmacro -INIT_XMM sse2 -; void ff_hevc_transform_add8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +%macro TRANSFORM_ADD_8 0 +; void ff_hevc_transform_add8_8_(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) cglobal hevc_transform_add8_8, 3, 4, 8 lea r3, [r2*3] TR_ADD_SSE_8_8 @@ -167,7 +167,7 @@ cglobal hevc_transform_add8_8, 3, 4, 8 RET %if ARCH_X86_64 -; void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +; void ff_hevc_transform_add16_8_(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) cglobal hevc_transform_add16_8, 3, 4, 12 lea r3, [r2*3] TR_ADD_SSE_16_8 @@ -178,7 +178,7 @@ cglobal hevc_transform_add16_8, 3, 4, 12 %endrep RET -; void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +; void ff_hevc_transform_add32_8_(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) cglobal hevc_transform_add32_8, 3, 4, 12 TR_ADD_SSE_32_8 @@ -190,6 +190,13 @@ cglobal hevc_transform_add32_8, 3, 4, 12 RET %endif ;ARCH_X86_64 +%endmacro + +INIT_XMM sse2 +TRANSFORM_ADD_8 +INIT_XMM avx +TRANSFORM_ADD_8 + ;----------------------------------------------------------------------------- ; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride) ;----------------------------------------------------------------------------- diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h index 7ced22cdb1..74b5173a31 100644 --- a/libavcodec/x86/hevcdsp.h +++ b/libavcodec/x86/hevcdsp.h @@ -139,6 +139,10 @@ void ff_hevc_transform_add8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stri void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); void ff_hevc_transform_add32_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_transform_add8_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_transform_add16_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_transform_add32_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); + void ff_hevc_transform_add4_10_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); void ff_hevc_transform_add8_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); void ff_hevc_transform_add16_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 0f9fe7d5d5..f6f0a4bddd 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -509,7 +509,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; + + c->transform_add[2] = ff_hevc_transform_add16_8_avx; + c->transform_add[3] = ff_hevc_transform_add32_8_avx; } + c->transform_add[1] = ff_hevc_transform_add8_8_avx; } if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;