diff --git a/libavfilter/gradfun.h b/libavfilter/gradfun.h index 6b192a3113..5d011309c6 100644 --- a/libavfilter/gradfun.h +++ b/libavfilter/gradfun.h @@ -37,12 +37,9 @@ typedef struct { void (*blur_line) (uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width); } GradFunContext; +void ff_gradfun_init_x86(GradFunContext *gf); + void ff_gradfun_filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width); -void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); -void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers); - -void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width); - #endif /* AVFILTER_GRADFUN_H */ diff --git a/libavfilter/vf_gradfun.c b/libavfilter/vf_gradfun.c index 12977d92b7..71749fe7a9 100644 --- a/libavfilter/vf_gradfun.c +++ b/libavfilter/vf_gradfun.c @@ -123,7 +123,6 @@ static av_cold int init(AVFilterContext *ctx, const char *args) GradFunContext *gf = ctx->priv; float thresh = 1.2; int radius = 16; - int cpu_flags = av_get_cpu_flags(); if (args) sscanf(args, "%f:%d", &thresh, &radius); @@ -135,12 +134,8 @@ static av_cold int init(AVFilterContext *ctx, const char *args) gf->blur_line = ff_gradfun_blur_line_c; gf->filter_line = ff_gradfun_filter_line_c; - if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX2) - gf->filter_line = ff_gradfun_filter_line_mmx2; - if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3) - gf->filter_line = ff_gradfun_filter_line_ssse3; - if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) - gf->blur_line = ff_gradfun_blur_line_sse2; + if (HAVE_MMX) + ff_gradfun_init_x86(gf); av_log(ctx, AV_LOG_VERBOSE, "threshold:%.2f radius:%d\n", thresh, gf->radius); diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c index ff3b19d38d..1d6a7ab363 100644 --- a/libavfilter/x86/gradfun.c +++ b/libavfilter/x86/gradfun.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/x86_cpu.h" #include "libavfilter/gradfun.h" @@ -25,9 +26,9 @@ DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F}; DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; -void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) +#if HAVE_MMX2 +static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) { -#if HAVE_MMX intptr_t x; if (width & 3) { x = width & ~3; @@ -70,12 +71,12 @@ void ff_gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int w "rm"(thresh), "m"(*dithers), "m"(*pw_7f) :"memory" ); -#endif } +#endif -void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) -{ #if HAVE_SSSE3 +static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) +{ intptr_t x; if (width & 7) { // could be 10% faster if I somehow eliminated this @@ -117,12 +118,12 @@ void ff_gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, int "rm"(thresh), "m"(*dithers), "m"(*pw_7f) :"memory" ); -#endif // HAVE_SSSE3 } +#endif // HAVE_SSSE3 -void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width) -{ #if HAVE_SSE +static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint8_t *src, int src_linesize, int width) +{ #define BLURV(load)\ intptr_t x = -2*width;\ __asm__ volatile(\ @@ -160,5 +161,17 @@ void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, uint } else { BLURV("movdqa"); } -#endif // HAVE_SSE +} +#endif // HAVE_SSE + +av_cold void ff_gradfun_init_x86(GradFunContext *gf) +{ + int cpu_flags = av_get_cpu_flags(); + + if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) + gf->filter_line = gradfun_filter_line_mmx2; + if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3) + gf->filter_line = gradfun_filter_line_ssse3; + if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) + gf->blur_line = gradfun_blur_line_sse2; }