ffmpeg/libavcodec/x86/takdsp_init.c
James Almer 591dc3b4b8 x86/takdsp: add avx2 versions of all functions
On an Intel Core i7 12700k:

decorrelate_ls_c: 814.3
decorrelate_ls_sse2: 165.8
decorrelate_ls_avx2: 101.3
decorrelate_sf_c: 1602.6
decorrelate_sf_sse4: 640.1
decorrelate_sf_avx2: 324.6
decorrelate_sm_c: 1564.8
decorrelate_sm_sse2: 379.3
decorrelate_sm_avx2: 203.3
decorrelate_sr_c: 785.3
decorrelate_sr_sse2: 176.3
decorrelate_sr_avx2: 99.8

Tested-by: Lynne <dev@lynne.ee>
Signed-off-by: James Almer <jamrial@gmail.com>
2023-12-23 08:39:22 -03:00

58 lines
2.2 KiB
C

/*
* Copyright (c) 2015 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavcodec/takdsp.h"
#include "libavutil/x86/cpu.h"
#include "config.h"
void ff_tak_decorrelate_ls_sse2(const int32_t *p1, int32_t *p2, int length);
void ff_tak_decorrelate_ls_avx2(const int32_t *p1, int32_t *p2, int length);
void ff_tak_decorrelate_sr_sse2(int32_t *p1, const int32_t *p2, int length);
void ff_tak_decorrelate_sr_avx2(int32_t *p1, const int32_t *p2, int length);
void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length);
void ff_tak_decorrelate_sm_avx2(int32_t *p1, int32_t *p2, int length);
void ff_tak_decorrelate_sf_sse4(int32_t *p1, const int32_t *p2, int length, int dshift, int dfactor);
void ff_tak_decorrelate_sf_avx2(int32_t *p1, const int32_t *p2, int length, int dshift, int dfactor);
av_cold void ff_takdsp_init_x86(TAKDSPContext *c)
{
#if HAVE_X86ASM
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags)) {
c->decorrelate_ls = ff_tak_decorrelate_ls_sse2;
c->decorrelate_sr = ff_tak_decorrelate_sr_sse2;
c->decorrelate_sm = ff_tak_decorrelate_sm_sse2;
}
if (EXTERNAL_SSE4(cpu_flags)) {
c->decorrelate_sf = ff_tak_decorrelate_sf_sse4;
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
c->decorrelate_ls = ff_tak_decorrelate_ls_avx2;
c->decorrelate_sr = ff_tak_decorrelate_sr_avx2;
c->decorrelate_sm = ff_tak_decorrelate_sm_avx2;
c->decorrelate_sf = ff_tak_decorrelate_sf_avx2;
}
#endif
}