x86: hpeldsp: kill hpeldsp_mmx.c

before:
1987 decicycles in 8_x2, 262121 runs, 23 skips

after:
1902 decicycles in 8_x2, 262112 runs, 32 skips

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Christophe Gisquet 2014-05-22 17:48:16 +00:00 committed by Michael Niedermayer
parent bda8ceb9f8
commit f8de35ebc4
4 changed files with 41 additions and 55 deletions

View File

@ -54,7 +54,6 @@ MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \
x86/idct_sse2_xvid.o \ x86/idct_sse2_xvid.o \
x86/simple_idct.o x86/simple_idct.o
MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o
MMX-OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_mmx.o
MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o
MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o

View File

@ -340,27 +340,58 @@ AVG_PIXELS8
; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PAVGB_MMX 4
movu %3, %1
por %3, %2
pxor %2, %1
pand %2, %4
psrlq %2, 1
psubb %3, %2
SWAP %2, %3
%endmacro
%macro AVG_PIXELS8_X2 0 %macro AVG_PIXELS8_X2 0
cglobal avg_pixels8_x2, 4,5 cglobal avg_pixels8_x2, 4,5
lea r4, [r2*2] lea r4, [r2*2]
%if notcpuflag(mmxext)
pcmpeqd m5, m5
paddb m5, m5
%endif
.loop: .loop:
mova m0, [r1] mova m0, [r1]
mova m2, [r1+r2] mova m2, [r1+r2]
%if notcpuflag(mmxext)
PAVGB_MMX [r1+1], m0, m3, m5
PAVGB_MMX [r1+r2+1], m2, m4, m5
PAVGB_MMX [r0], m0, m3, m5
PAVGB_MMX [r0+r2], m2, m4, m5
%else
PAVGB m0, [r1+1] PAVGB m0, [r1+1]
PAVGB m2, [r1+r2+1] PAVGB m2, [r1+r2+1]
PAVGB m0, [r0] PAVGB m0, [r0]
PAVGB m2, [r0+r2] PAVGB m2, [r0+r2]
%endif
add r1, r4 add r1, r4
mova [r0], m0 mova [r0], m0
mova [r0+r2], m2 mova [r0+r2], m2
mova m0, [r1] mova m0, [r1]
mova m2, [r1+r2] mova m2, [r1+r2]
%if notcpuflag(mmxext)
PAVGB_MMX [r1+1], m0, m3, m5
PAVGB_MMX [r1+r2+1], m2, m4, m5
%else
PAVGB m0, [r1+1] PAVGB m0, [r1+1]
PAVGB m2, [r1+r2+1] PAVGB m2, [r1+r2+1]
%endif
add r0, r4 add r0, r4
add r1, r4 add r1, r4
%if notcpuflag(mmxext)
PAVGB_MMX [r0], m0, m3, m5
PAVGB_MMX [r0+r2], m2, m4, m5
%else
PAVGB m0, [r0] PAVGB m0, [r0]
PAVGB m2, [r0+r2] PAVGB m2, [r0+r2]
%endif
mova [r0], m0 mova [r0], m0
mova [r0+r2], m2 mova [r0+r2], m2
add r0, r4 add r0, r4
@ -369,6 +400,8 @@ cglobal avg_pixels8_x2, 4,5
REP_RET REP_RET
%endmacro %endmacro
INIT_MMX mmx
AVG_PIXELS8_X2
INIT_MMX mmxext INIT_MMX mmxext
AVG_PIXELS8_X2 AVG_PIXELS8_X2
INIT_MMX 3dnow INIT_MMX 3dnow

View File

@ -190,7 +190,14 @@ static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int cpu_flags)
SET_HPEL_FUNCS(avg_no_rnd, , 16, mmx); SET_HPEL_FUNCS(avg_no_rnd, , 16, mmx);
SET_HPEL_FUNCS(put, [1], 8, mmx); SET_HPEL_FUNCS(put, [1], 8, mmx);
SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx); SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx);
SET_HPEL_FUNCS(avg, [1], 8, mmx); if (HAVE_MMX_EXTERNAL) {
c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmx;
c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmx;
}
#if HAVE_MMX_INLINE
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmx;
#endif
} }
static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags) static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)

View File

@ -1,53 +0,0 @@
/*
* MMX-optimized avg/put pixel routines
*
* Copyright (c) 2001 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stddef.h>
#include <stdint.h>
#include "config.h"
#include "hpeldsp.h"
#include "inline_asm.h"
#if HAVE_MMX_INLINE
void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h)
{
MOVQ_BFE(mm6);
JUMPALIGN();
do {
__asm__ volatile(
"movq %1, %%mm0 \n\t"
"movq 1%1, %%mm1 \n\t"
"movq %0, %%mm3 \n\t"
PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
"movq %%mm0, %0 \n\t"
:"+m"(*block)
:"m"(*pixels)
:"memory");
pixels += line_size;
block += line_size;
} while (--h);
}
#endif /* HAVE_MMX_INLINE */