mirror of https://git.ffmpeg.org/ffmpeg.git
avcodec/x86/lossless_audiodsp: Move order&8 fallback into C code
This is simpler and more robust, and fixes mismatching XMM save restore mismatches Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
d41b66a1a2
commit
f1214763af
|
@ -26,12 +26,6 @@ SECTION_TEXT
|
||||||
; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3,
|
; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3,
|
||||||
; int order, int mul)
|
; int order, int mul)
|
||||||
cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
|
cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
|
||||||
%if mmsize == 16
|
|
||||||
test orderq, 8
|
|
||||||
jnz scalarproduct_and_madd_int16_fallback
|
|
||||||
%else
|
|
||||||
scalarproduct_and_madd_int16_fallback
|
|
||||||
%endif
|
|
||||||
shl orderq, 1
|
shl orderq, 1
|
||||||
movd m7, mulm
|
movd m7, mulm
|
||||||
%if mmsize == 16
|
%if mmsize == 16
|
||||||
|
@ -123,8 +117,6 @@ align 16
|
||||||
; int order, int mul)
|
; int order, int mul)
|
||||||
INIT_XMM ssse3
|
INIT_XMM ssse3
|
||||||
cglobal scalarproduct_and_madd_int16, 4,4,10, v1, v2, v3, order, mul
|
cglobal scalarproduct_and_madd_int16, 4,4,10, v1, v2, v3, order, mul
|
||||||
test orderq, 8
|
|
||||||
jnz scalarproduct_and_madd_int16_fallback
|
|
||||||
shl orderq, 1
|
shl orderq, 1
|
||||||
movd m7, mulm
|
movd m7, mulm
|
||||||
pshuflw m7, m7, 0
|
pshuflw m7, m7, 0
|
||||||
|
|
|
@ -31,6 +31,30 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
|
||||||
const int16_t *v3,
|
const int16_t *v3,
|
||||||
int order, int mul);
|
int order, int mul);
|
||||||
|
|
||||||
|
static int32_t scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2,
|
||||||
|
const int16_t *v3,
|
||||||
|
int order, int mul)
|
||||||
|
{
|
||||||
|
#if HAVE_SSE2_EXTERNAL
|
||||||
|
if (order & 8)
|
||||||
|
return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul);
|
||||||
|
else
|
||||||
|
return ff_scalarproduct_and_madd_int16_sse2(v1, v2, v3, order, mul);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32_t scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
|
||||||
|
const int16_t *v3,
|
||||||
|
int order, int mul)
|
||||||
|
{
|
||||||
|
#if HAVE_SSSE3_EXTERNAL
|
||||||
|
if (order & 8)
|
||||||
|
return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul);
|
||||||
|
else
|
||||||
|
return ff_scalarproduct_and_madd_int16_ssse3(v1, v2, v3, order, mul);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
|
av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
|
||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
@ -39,9 +63,9 @@ av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
|
||||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext;
|
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext;
|
||||||
|
|
||||||
if (EXTERNAL_SSE2(cpu_flags))
|
if (EXTERNAL_SSE2(cpu_flags))
|
||||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
|
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_sse2;
|
||||||
|
|
||||||
if (EXTERNAL_SSSE3(cpu_flags) &&
|
if (EXTERNAL_SSSE3(cpu_flags) &&
|
||||||
!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
|
!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
|
||||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
|
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_ssse3;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue