mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-02-07 23:32:33 +00:00
parent
ea5b375e0e
commit
1e202d89c9
@ -23,6 +23,10 @@
|
|||||||
|
|
||||||
%include "libavutil/x86/x86util.asm"
|
%include "libavutil/x86/x86util.asm"
|
||||||
|
|
||||||
|
SECTION_RODATA
|
||||||
|
|
||||||
|
vector: db 0,1,4,5,8,9,12,13,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,4,5,8,9,12,13,
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
%macro PMACSDQL 5
|
%macro PMACSDQL 5
|
||||||
@ -89,6 +93,9 @@ LPC_32 sse4
|
|||||||
;----------------------------------------------------------------------------------
|
;----------------------------------------------------------------------------------
|
||||||
%macro FLAC_DECORRELATE_16 3-4
|
%macro FLAC_DECORRELATE_16 3-4
|
||||||
cglobal flac_decorrelate_%1_16, 2, 4, 4, out, in0, in1, len
|
cglobal flac_decorrelate_%1_16, 2, 4, 4, out, in0, in1, len
|
||||||
|
%ifidn %1, indep2
|
||||||
|
VBROADCASTI128 m2, [vector]
|
||||||
|
%endif
|
||||||
%if ARCH_X86_32
|
%if ARCH_X86_32
|
||||||
mov lend, lenm
|
mov lend, lenm
|
||||||
%endif
|
%endif
|
||||||
@ -112,11 +119,17 @@ align 16
|
|||||||
%endif
|
%endif
|
||||||
%ifnidn %1, indep2
|
%ifnidn %1, indep2
|
||||||
p%4d m2, m0, m1
|
p%4d m2, m0, m1
|
||||||
|
packssdw m%2, m%2
|
||||||
|
packssdw m%3, m%3
|
||||||
|
punpcklwd m%2, m%3
|
||||||
|
psllw m%2, m3
|
||||||
|
%else
|
||||||
|
pslld m%2, m3
|
||||||
|
pslld m%3, m3
|
||||||
|
pshufb m%2, m%2, m2
|
||||||
|
pshufb m%3, m%3, m2
|
||||||
|
punpcklwd m%2, m%3
|
||||||
%endif
|
%endif
|
||||||
packssdw m%2, m%2
|
|
||||||
packssdw m%3, m%3
|
|
||||||
punpcklwd m%2, m%3
|
|
||||||
psllw m%2, m3
|
|
||||||
mova [outq + lenq], m%2
|
mova [outq + lenq], m%2
|
||||||
add lenq, 16
|
add lenq, 16
|
||||||
jl .loop
|
jl .loop
|
||||||
@ -292,7 +305,7 @@ align 16
|
|||||||
REP_RET
|
REP_RET
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
INIT_XMM sse2
|
INIT_XMM ssse3
|
||||||
FLAC_DECORRELATE_16 indep2, 0, 1 ; Reuse stereo 16bits macro
|
FLAC_DECORRELATE_16 indep2, 0, 1 ; Reuse stereo 16bits macro
|
||||||
FLAC_DECORRELATE_INDEP 32, 2, 3, d
|
FLAC_DECORRELATE_INDEP 32, 2, 3, d
|
||||||
FLAC_DECORRELATE_INDEP 16, 4, 3, w
|
FLAC_DECORRELATE_INDEP 16, 4, 3, w
|
||||||
|
@ -34,7 +34,9 @@ void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int chann
|
|||||||
void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
|
void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
|
||||||
int len, int shift); \
|
int len, int shift); \
|
||||||
void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
|
void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
|
||||||
int len, int shift); \
|
int len, int shift);
|
||||||
|
|
||||||
|
#define DECORRELATE_IFUNCS(fmt, opt) \
|
||||||
void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
|
void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
|
||||||
int len, int shift); \
|
int len, int shift); \
|
||||||
void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
|
void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
|
||||||
@ -48,6 +50,10 @@ DECORRELATE_FUNCS(16, sse2);
|
|||||||
DECORRELATE_FUNCS(16, avx);
|
DECORRELATE_FUNCS(16, avx);
|
||||||
DECORRELATE_FUNCS(32, sse2);
|
DECORRELATE_FUNCS(32, sse2);
|
||||||
DECORRELATE_FUNCS(32, avx);
|
DECORRELATE_FUNCS(32, avx);
|
||||||
|
DECORRELATE_IFUNCS(16, ssse3);
|
||||||
|
DECORRELATE_IFUNCS(16, avx);
|
||||||
|
DECORRELATE_IFUNCS(32, ssse3);
|
||||||
|
DECORRELATE_IFUNCS(32, avx);
|
||||||
|
|
||||||
av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
|
av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
|
||||||
{
|
{
|
||||||
@ -56,31 +62,36 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
|
|||||||
|
|
||||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||||
if (fmt == AV_SAMPLE_FMT_S16) {
|
if (fmt == AV_SAMPLE_FMT_S16) {
|
||||||
if (channels == 2)
|
|
||||||
c->decorrelate[0] = ff_flac_decorrelate_indep2_16_sse2;
|
|
||||||
else if (channels == 4)
|
|
||||||
c->decorrelate[0] = ff_flac_decorrelate_indep4_16_sse2;
|
|
||||||
else if (channels == 6)
|
|
||||||
c->decorrelate[0] = ff_flac_decorrelate_indep6_16_sse2;
|
|
||||||
else if (ARCH_X86_64 && channels == 8)
|
|
||||||
c->decorrelate[0] = ff_flac_decorrelate_indep8_16_sse2;
|
|
||||||
c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
|
c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
|
||||||
c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
|
c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
|
||||||
c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
|
c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
|
||||||
} else if (fmt == AV_SAMPLE_FMT_S32) {
|
} else if (fmt == AV_SAMPLE_FMT_S32) {
|
||||||
if (channels == 2)
|
|
||||||
c->decorrelate[0] = ff_flac_decorrelate_indep2_32_sse2;
|
|
||||||
else if (channels == 4)
|
|
||||||
c->decorrelate[0] = ff_flac_decorrelate_indep4_32_sse2;
|
|
||||||
else if (channels == 6)
|
|
||||||
c->decorrelate[0] = ff_flac_decorrelate_indep6_32_sse2;
|
|
||||||
else if (ARCH_X86_64 && channels == 8)
|
|
||||||
c->decorrelate[0] = ff_flac_decorrelate_indep8_32_sse2;
|
|
||||||
c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
|
c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
|
||||||
c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
|
c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
|
||||||
c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
|
c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (EXTERNAL_SSSE3(cpu_flags)) {
|
||||||
|
if (fmt == AV_SAMPLE_FMT_S16) {
|
||||||
|
if (channels == 2)
|
||||||
|
c->decorrelate[0] = ff_flac_decorrelate_indep2_16_ssse3;
|
||||||
|
else if (channels == 4)
|
||||||
|
c->decorrelate[0] = ff_flac_decorrelate_indep4_16_ssse3;
|
||||||
|
else if (channels == 6)
|
||||||
|
c->decorrelate[0] = ff_flac_decorrelate_indep6_16_ssse3;
|
||||||
|
else if (ARCH_X86_64 && channels == 8)
|
||||||
|
c->decorrelate[0] = ff_flac_decorrelate_indep8_16_ssse3;
|
||||||
|
} else if (fmt == AV_SAMPLE_FMT_S32) {
|
||||||
|
if (channels == 2)
|
||||||
|
c->decorrelate[0] = ff_flac_decorrelate_indep2_32_ssse3;
|
||||||
|
else if (channels == 4)
|
||||||
|
c->decorrelate[0] = ff_flac_decorrelate_indep4_32_ssse3;
|
||||||
|
else if (channels == 6)
|
||||||
|
c->decorrelate[0] = ff_flac_decorrelate_indep6_32_ssse3;
|
||||||
|
else if (ARCH_X86_64 && channels == 8)
|
||||||
|
c->decorrelate[0] = ff_flac_decorrelate_indep8_32_ssse3;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (EXTERNAL_SSE4(cpu_flags)) {
|
if (EXTERNAL_SSE4(cpu_flags)) {
|
||||||
c->lpc32 = ff_flac_lpc_32_sse4;
|
c->lpc32 = ff_flac_lpc_32_sse4;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user