diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c index c403298cab..afe3f13c68 100644 --- a/libpostproc/postprocess.c +++ b/libpostproc/postprocess.c @@ -557,8 +557,13 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, # include "postprocess_template.c" # define TEMPLATE_PP_3DNOW 1 # include "postprocess_template.c" +# define TEMPLATE_PP_SSE2 1 +# include "postprocess_template.c" # else -# if HAVE_MMXEXT_INLINE +# if HAVE_SSE2_INLINE +# define TEMPLATE_PP_SSE2 1 +# include "postprocess_template.c" +# elif HAVE_MMXEXT_INLINE # define TEMPLATE_PP_MMXEXT 1 # include "postprocess_template.c" # elif HAVE_AMD3DNOW_INLINE @@ -586,14 +591,17 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[] #if CONFIG_RUNTIME_CPUDETECT #if ARCH_X86 && HAVE_INLINE_ASM // ordered per speed fastest first - if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2; + if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2; + else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2; else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow; else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX; #elif HAVE_ALTIVEC if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec; #endif #else /* CONFIG_RUNTIME_CPUDETECT */ -#if HAVE_MMXEXT_INLINE +#if HAVE_SSE2_INLINE + pp = postProcess_SSE2; +#elif HAVE_MMXEXT_INLINE pp = postProcess_MMX2; #elif HAVE_AMD3DNOW_INLINE pp = postProcess_3DNow; diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c index 22c6e5a741..dc63032792 100644 --- a/libpostproc/postprocess_template.c +++ b/libpostproc/postprocess_template.c @@ -64,6 +64,16 @@ # define TEMPLATE_PP_3DNOW 0 #endif +#ifdef TEMPLATE_PP_SSE2 +# undef TEMPLATE_PP_MMX +# define TEMPLATE_PP_MMX 1 +# undef TEMPLATE_PP_MMXEXT +# define TEMPLATE_PP_MMXEXT 1 +# define RENAME(a) a ## _SSE2 +#else +# define TEMPLATE_PP_SSE2 0 +#endif + #undef REAL_PAVGB #undef PAVGB #undef PMINUB @@ -3675,3 +3685,4 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ #undef TEMPLATE_PP_MMX #undef TEMPLATE_PP_MMXEXT #undef TEMPLATE_PP_3DNOW +#undef TEMPLATE_PP_SSE2