diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c index 78d4689ba0..05f7b250c6 100644 --- a/libavcodec/x86/cavsdsp.c +++ b/libavcodec/x86/cavsdsp.c @@ -474,6 +474,12 @@ static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, uint8_t *src, ff_avg_pixels8_mmx(dst, src, stride, 8); } +static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, + ptrdiff_t stride) +{ + ff_avg_pixels8_mmxext(dst, src, stride, 8); +} + static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, ptrdiff_t stride) { @@ -485,6 +491,24 @@ static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, uint8_t *src, { ff_avg_pixels16_mmx(dst, src, stride, 16); } + +static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, + ptrdiff_t stride) +{ + ff_avg_pixels16_mmxext(dst, src, stride, 16); +} + +static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, uint8_t *src, + ptrdiff_t stride) +{ + ff_put_pixels16_sse2(dst, src, stride, 16); +} + +static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, uint8_t *src, + ptrdiff_t stride) +{ + ff_avg_pixels16_sse2(dst, src, stride, 16); +} #endif static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c, @@ -517,15 +541,6 @@ CAVS_MC(put_, 8, mmxext) CAVS_MC(put_, 16, mmxext) CAVS_MC(avg_, 8, mmxext) CAVS_MC(avg_, 16, mmxext) - -static av_cold void cavsdsp_init_mmxext(CAVSDSPContext *c, - AVCodecContext *avctx) -{ - DSPFUNC(put, 0, 16, mmxext); - DSPFUNC(put, 1, 8, mmxext); - DSPFUNC(avg, 0, 16, mmxext); - DSPFUNC(avg, 1, 8, mmxext); -} #endif /* HAVE_MMXEXT_INLINE */ #if HAVE_AMD3DNOW_INLINE @@ -557,7 +572,23 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) cavsdsp_init_3dnow(c, avctx); #endif /* HAVE_AMD3DNOW_INLINE */ #if HAVE_MMXEXT_INLINE - if (INLINE_MMXEXT(cpu_flags)) - cavsdsp_init_mmxext(c, avctx); -#endif /* HAVE_MMXEXT_INLINE */ + if (INLINE_MMXEXT(cpu_flags)) { + DSPFUNC(put, 0, 16, mmxext); + DSPFUNC(put, 1, 8, mmxext); + DSPFUNC(avg, 0, 16, mmxext); + DSPFUNC(avg, 1, 8, mmxext); + } +#endif +#if HAVE_MMX_EXTERNAL + if (EXTERNAL_MMXEXT(cpu_flags)) { + c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmxext; + c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext; + } +#endif +#if HAVE_SSE2_EXTERNAL + if (EXTERNAL_SSE2(cpu_flags)) { + c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2; + c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2; + } +#endif } diff --git a/libavcodec/x86/diracdsp_mmx.c b/libavcodec/x86/diracdsp_mmx.c index e10feafe40..a12f9523d0 100644 --- a/libavcodec/x86/diracdsp_mmx.c +++ b/libavcodec/x86/diracdsp_mmx.c @@ -83,9 +83,6 @@ void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[ }\ } -void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); - DIRAC_PIXOP(put, ff_put, mmx) DIRAC_PIXOP(avg, ff_avg, mmx) DIRAC_PIXOP(avg, ff_avg, mmxext) diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index 5dd6c20e4f..30829ae5f5 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -115,9 +115,6 @@ void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, #define ff_put_pixels16_mmxext ff_put_pixels16_mmx #define ff_put_pixels8_mmxext ff_put_pixels8_mmx -void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); - #define QPEL_OP(OPNAME, RND, MMX) \ static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src, \ ptrdiff_t stride) \ diff --git a/libavcodec/x86/fpel.h b/libavcodec/x86/fpel.h index 2fbbfce26f..4d93959a96 100644 --- a/libavcodec/x86/fpel.h +++ b/libavcodec/x86/fpel.h @@ -28,6 +28,8 @@ void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 1a06acab2e..85a0f6ce66 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -53,9 +53,6 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, #define ff_put_pixels8_mmxext ff_put_pixels8_mmx #define ff_put_pixels4_mmxext ff_put_pixels4_mmx -void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); - #define DEF_QPEL(OPNAME)\ void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src, int dstStride, int srcStride);\ diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h index bfa849c0cf..5fae990a4f 100644 --- a/libavcodec/x86/hpeldsp.h +++ b/libavcodec/x86/hpeldsp.h @@ -27,12 +27,27 @@ void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels, void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_avg_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); + void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_avg_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_avg_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); #endif /* AVCODEC_X86_HPELDSP_H */ diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index 42e33416eb..bcae22fa44 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -48,10 +48,6 @@ void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_avg_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, @@ -86,8 +82,6 @@ void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, @@ -95,15 +89,6 @@ void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_avg_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_avg_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); - #define avg_pixels8_mmx ff_avg_pixels8_mmx #define avg_pixels8_x2_mmx ff_avg_pixels8_x2_mmx #define avg_pixels16_mmx ff_avg_pixels16_mmx diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c index 69480597b1..5bd9026ef9 100644 --- a/libavcodec/x86/rv40dsp_init.c +++ b/libavcodec/x86/rv40dsp_init.c @@ -32,6 +32,13 @@ #include "libavutil/x86/cpu.h" #include "hpeldsp.h" +#define DEFINE_FN(op, size, insn) \ +static void op##_rv40_qpel##size##_mc33_##insn(uint8_t *dst, uint8_t *src, \ + ptrdiff_t stride) \ +{ \ + ff_##op##_pixels##size##_xy2_##insn(dst, src, stride, size); \ +} + #if HAVE_YASM void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); @@ -186,30 +193,24 @@ QPEL_FUNCS_SET (OP, 3, 1, OPT) \ QPEL_FUNCS_SET (OP, 3, 2, OPT) /** @} */ +DEFINE_FN(put, 8, ssse3) + +DEFINE_FN(put, 16, sse2) +DEFINE_FN(put, 16, ssse3) + +DEFINE_FN(avg, 8, mmxext) +DEFINE_FN(avg, 8, ssse3) + +DEFINE_FN(avg, 16, sse2) +DEFINE_FN(avg, 16, ssse3) #endif /* HAVE_YASM */ #if HAVE_MMX_INLINE -static void put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, - ptrdiff_t stride) -{ - ff_put_pixels8_xy2_mmx(dst, src, stride, 8); -} -static void put_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, - ptrdiff_t stride) -{ - ff_put_pixels16_xy2_mmx(dst, src, stride, 16); -} -static void avg_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, - ptrdiff_t stride) -{ - ff_avg_pixels8_xy2_mmx(dst, src, stride, 8); -} -static void avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, - ptrdiff_t stride) -{ - ff_avg_pixels16_xy2_mmx(dst, src, stride, 16); -} -#endif /* HAVE_MMX_INLINE */ +DEFINE_FN(put, 8, mmx) +DEFINE_FN(avg, 8, mmx) +DEFINE_FN(put, 16, mmx) +DEFINE_FN(avg, 16, mmx) +#endif av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) { @@ -240,6 +241,7 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) #endif } if (EXTERNAL_MMXEXT(cpu_flags)) { + c->avg_pixels_tab[1][15] = avg_rv40_qpel8_mc33_mmxext; c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmxext; c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmxext; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmxext; @@ -251,6 +253,8 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) #endif } if (EXTERNAL_SSE2(cpu_flags)) { + c->put_pixels_tab[0][15] = put_rv40_qpel16_mc33_sse2; + c->avg_pixels_tab[0][15] = avg_rv40_qpel16_mc33_sse2; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; @@ -259,6 +263,10 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c) QPEL_MC_SET(avg_, _sse2) } if (EXTERNAL_SSSE3(cpu_flags)) { + c->put_pixels_tab[0][15] = put_rv40_qpel16_mc33_ssse3; + c->put_pixels_tab[1][15] = put_rv40_qpel8_mc33_ssse3; + c->avg_pixels_tab[0][15] = avg_rv40_qpel16_mc33_ssse3; + c->avg_pixels_tab[1][15] = avg_rv40_qpel8_mc33_ssse3; c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;