From f8de35ebc4f2cf5802e990ce74b0a564b962687f Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Thu, 22 May 2014 17:48:16 +0000 Subject: [PATCH] x86: hpeldsp: kill hpeldsp_mmx.c before: 1987 decicycles in 8_x2, 262121 runs, 23 skips after: 1902 decicycles in 8_x2, 262112 runs, 32 skips Signed-off-by: Michael Niedermayer --- libavcodec/x86/Makefile | 1 - libavcodec/x86/hpeldsp.asm | 33 ++++++++++++++++++++++ libavcodec/x86/hpeldsp_init.c | 9 +++++- libavcodec/x86/hpeldsp_mmx.c | 53 ----------------------------------- 4 files changed, 41 insertions(+), 55 deletions(-) delete mode 100644 libavcodec/x86/hpeldsp_mmx.c diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index adf4843a61..9c39265f36 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -54,7 +54,6 @@ MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ x86/idct_sse2_xvid.o \ x86/simple_idct.o MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o -MMX-OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_mmx.o MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index 171c77b928..fce434c4ad 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -340,27 +340,58 @@ AVG_PIXELS8 ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +%macro PAVGB_MMX 4 + movu %3, %1 + por %3, %2 + pxor %2, %1 + pand %2, %4 + psrlq %2, 1 + psubb %3, %2 + SWAP %2, %3 +%endmacro + %macro AVG_PIXELS8_X2 0 cglobal avg_pixels8_x2, 4,5 lea r4, [r2*2] +%if notcpuflag(mmxext) + pcmpeqd m5, m5 + paddb m5, m5 +%endif .loop: mova m0, [r1] mova m2, [r1+r2] +%if notcpuflag(mmxext) + PAVGB_MMX [r1+1], m0, m3, m5 + PAVGB_MMX [r1+r2+1], m2, m4, m5 + PAVGB_MMX [r0], m0, m3, m5 + PAVGB_MMX [r0+r2], m2, m4, m5 +%else PAVGB m0, [r1+1] PAVGB m2, [r1+r2+1] PAVGB m0, [r0] PAVGB m2, [r0+r2] +%endif add r1, r4 mova [r0], m0 mova [r0+r2], m2 mova m0, [r1] mova m2, [r1+r2] +%if notcpuflag(mmxext) + PAVGB_MMX [r1+1], m0, m3, m5 + PAVGB_MMX [r1+r2+1], m2, m4, m5 +%else PAVGB m0, [r1+1] PAVGB m2, [r1+r2+1] +%endif add r0, r4 add r1, r4 +%if notcpuflag(mmxext) + PAVGB_MMX [r0], m0, m3, m5 + PAVGB_MMX [r0+r2], m2, m4, m5 +%else PAVGB m0, [r0] PAVGB m2, [r0+r2] +%endif mova [r0], m0 mova [r0+r2], m2 add r0, r4 @@ -369,6 +400,8 @@ cglobal avg_pixels8_x2, 4,5 REP_RET %endmacro +INIT_MMX mmx +AVG_PIXELS8_X2 INIT_MMX mmxext AVG_PIXELS8_X2 INIT_MMX 3dnow diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index 4a1b3cb80e..95de9fe4ef 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -190,7 +190,14 @@ static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int cpu_flags) SET_HPEL_FUNCS(avg_no_rnd, , 16, mmx); SET_HPEL_FUNCS(put, [1], 8, mmx); SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx); - SET_HPEL_FUNCS(avg, [1], 8, mmx); + if (HAVE_MMX_EXTERNAL) { + c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmx; + c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmx; + } +#if HAVE_MMX_INLINE + c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; + c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmx; +#endif } static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags) diff --git a/libavcodec/x86/hpeldsp_mmx.c b/libavcodec/x86/hpeldsp_mmx.c deleted file mode 100644 index 039ba776a2..0000000000 --- a/libavcodec/x86/hpeldsp_mmx.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * MMX-optimized avg/put pixel routines - * - * Copyright (c) 2001 Fabrice Bellard - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include - -#include "config.h" -#include "hpeldsp.h" -#include "inline_asm.h" - -#if HAVE_MMX_INLINE - -void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h) -{ - MOVQ_BFE(mm6); - JUMPALIGN(); - do { - __asm__ volatile( - "movq %1, %%mm0 \n\t" - "movq 1%1, %%mm1 \n\t" - "movq %0, %%mm3 \n\t" - PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6) - PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6) - "movq %%mm0, %0 \n\t" - :"+m"(*block) - :"m"(*pixels) - :"memory"); - pixels += line_size; - block += line_size; - } while (--h); -} - -#endif /* HAVE_MMX_INLINE */