applied MMX2 memcpy() patch by Nick Kurshev

git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@351 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
arpi_esp 2001-04-11 12:47:45 +00:00
parent 338538f504
commit 506063f11a
9 changed files with 129 additions and 0 deletions

17
configure vendored
View File

@ -70,6 +70,7 @@ usage: $0 [options]
params:
--cc use this C compiler to build MPlayer [gcc]
--enable-mmx build with mmx support [autodetect]
--enable-mmx2 build with mmx2 support (PIII, Athlon) [autodetect]
--enable-3dnow build with 3dnow! support [autodetect]
--enable-sse build with sse support [autodetect]
--enable-gl build with OpenGL render support [autodetect]
@ -153,6 +154,7 @@ pmodel=`cat /proc/cpuinfo | grep "model$TAB" | cut -d ':' -f 2 | cut -d ' ' -f 2
pstepping=`cat /proc/cpuinfo | grep 'stepping' | cut -d ':' -f 2 | cut -d ' ' -f 2`
_mmx=no
_mmx2=no
_3dnow=no
_mtrr=no
_sse=no
@ -193,6 +195,9 @@ for i in `echo $pparam`; do
mmx)
_mmx=yes
;;
mmxext)
_mmx2=yes
;;
mtrr)
_mtrr=yes
;;
@ -444,6 +449,9 @@ do
--enable-mmx)
_mmx=yes
;;
--enable-mmx2)
_mmx2=yes
;;
--enable-mtrr)
_mtrr=yes
;;
@ -506,6 +514,7 @@ do
;;
--disable-mmx)
_mmx=no
_mmx2=no
;;
--disable-mtrr)
_mtrr=no
@ -573,6 +582,7 @@ echo "Checking for cpu vendor ... $pvendor ( $pfamily:$pmodel:$pstepping )"
echo "Checking for cpu type ... $pname"
echo "Optimizing to ... $proc"
echo "Checking for mmx support ... $_mmx"
echo "Checking for mmx2 support ... $_mmx2"
echo "Checking for 3dnow support ... $_3dnow"
echo "Checking for sse support ... $_sse"
echo "Checking for mtrr support ... $_mtrr"
@ -675,6 +685,12 @@ else
_mmx='#undef HAVE_MMX'
fi
if [ "$_mmx2" = "yes" ]; then
_mmx2='#define HAVE_MMX2'
else
_mmx2='#undef HAVE_MMX2'
fi
if [ $_3dnow = yes ]; then
_3dnowm='#define HAVE_3DNOW'
else
@ -851,6 +867,7 @@ $_termcap
$_mlib // available only on solaris
$_3dnowm // only define if you have 3DNOW (AMD k6-2, AMD Athlon, iDT WinChip, etc.)
$_mmx // only define if you have MMX
$_mmx2 // only define if you have MMX2
$_ssem // only define if you have SSE (Intel Pentium III or Celeron II)
/* libvo options */

View File

@ -27,6 +27,104 @@
#ifndef _MMX_H
#define _MMX_H
/*
This part of code was taken by from Linux-2.4.3 and slightly modified
for MMX2 instruction set. I have done it since linux uses page aligned
blocks but mplayer uses weakly ordered data and original sources can not
speedup their. Only using prefetch and movntq together have effect!
If you have questions please contact with me: Nick Kurshev: nickols_k@mail.ru.
*/
#ifndef HAVE_MMX2
//static inline void * __memcpy(void * to, const void * from, unsigned n)
inline static void * memcpy(void * to, const void * from, unsigned n)
{
int d0, d1, d2;
__asm__ __volatile__(
"rep ; movsl\n\t"
"testb $2,%b4\n\t"
"je 1f\n\t"
"movsw\n"
"1:\ttestb $1,%b4\n\t"
"je 2f\n\t"
"movsb\n"
"2:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
:"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
: "memory");
return (to);
}
#else
//inline static void *__memcpy_mmx2(void *to, const void *from, unsigned len)
inline static void * memcpy(void * to, const void * from, unsigned n)
{
void *p;
int i;
if(len >= 0x200) /* 512-byte blocks */
{
p = to;
i = len >> 6; /* len/64 */
__asm__ __volatile__ (
"1: prefetch (%0)\n" /* This set is 28 bytes */
" prefetch 64(%0)\n"
" prefetch 128(%0)\n"
" prefetch 192(%0)\n"
" prefetch 256(%0)\n"
"2: \n"
".section .fixup, \"ax\"\n"
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 1b, 3b\n"
".previous"
: : "r" (from) );
for(; i>0; i--)
{
__asm__ __volatile__ (
"1: prefetch 320(%0)\n"
"2: movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n"
" movq 24(%0), %%mm3\n"
" movntq %%mm0, (%1)\n"
" movntq %%mm1, 8(%1)\n"
" movntq %%mm2, 16(%1)\n"
" movntq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm0\n"
" movq 40(%0), %%mm1\n"
" movq 48(%0), %%mm2\n"
" movq 56(%0), %%mm3\n"
" movntq %%mm0, 32(%1)\n"
" movntq %%mm1, 40(%1)\n"
" movntq %%mm2, 48(%1)\n"
" movntq %%mm3, 56(%1)\n"
".section .fixup, \"ax\"\n"
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 1b, 3b\n"
".previous"
: : "r" (from), "r" (to) : "memory");
from+=64;
to+=64;
}
__asm__ __volatile__ ("emms":::"memory");
}
/*
* Now do the tail of the block
*/
__memcpy(to, from, len&63);
return p;
}
#endif
/* Warning: at this writing, the version of GAS packaged
with most Linux distributions does not handle the

View File

@ -49,6 +49,8 @@ LIBVO_EXTERN(3dfx)
#include "drivers/3dfx.h"
#include "mmx.h"
static vo_info_t vo_info =
{
"3dfx (/dev/3dfx)",

View File

@ -24,6 +24,8 @@
#include "yuv2rgb.h"
extern void rgb15to16_mmx(char *s0, char *d0, int count);
#include "mmx.h"
LIBVO_EXTERN(fbdev)
static vo_info_t vo_info = {

View File

@ -19,6 +19,8 @@ LIBVO_EXTERN(odivx)
#include "../encore/encore.h"
#include "mmx.h"
static vo_info_t vo_info =
{
"OpenDivX AVI File writer",

View File

@ -68,6 +68,8 @@
#include "video_out.h"
#include "video_out_internal.h"
#include "mmx.h"
LIBVO_EXTERN(sdl)
//#include "log.h"

View File

@ -43,6 +43,8 @@ LIBVO_EXTERN(syncfb)
#include "drivers/syncfb/syncfb.h"
#include "mmx.h"
static vo_info_t vo_info =
{
"Matrox G200/G400 Synchronous framebuffer (/dev/syncfb)",

View File

@ -36,6 +36,8 @@ LIBVO_EXTERN( x11 )
#include "x11_common.h"
#include "mmx.h"
static vo_info_t vo_info =
{
"X11 ( XImage/Shm )",

View File

@ -28,6 +28,8 @@ LIBVO_EXTERN(xv)
#include "x11_common.h"
#include "mmx.h"
static vo_info_t vo_info =
{
"X11/Xv",