Fix and restructure fastmemcpybench. It is now one binary that runs all

available memcpy variants and prints benchmark results about them.


git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@28929 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
diego 2009-03-10 10:05:09 +00:00
parent cae6488255
commit 143a63fd9e
3 changed files with 140 additions and 45 deletions

View File

@ -986,7 +986,7 @@ tests: $(addsuffix $(EXESUF),$(TESTS))
testsclean:
-rm -f $(foreach file,$(TESTS),$(call ADD_ALL_EXESUFS,$(file)))
TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 movinfo netstream subrip vivodump)
TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 fastmemcpybench movinfo netstream subrip vivodump)
ifdef ARCH_X86
TOOLS += TOOLS/modify_reg
@ -999,7 +999,7 @@ alltools: $(addsuffix $(EXESUF),$(ALLTOOLS))
toolsclean:
-rm -f $(foreach file,$(ALLTOOLS),$(call ADD_ALL_EXESUFS,$(file)))
-rm -f TOOLS/fastmem-* TOOLS/realcodecs/*.so.6.0
-rm -f TOOLS/realcodecs/*.so.6.0
TOOLS/bmovl-test$(EXESUF): -lSDL_image
@ -1016,27 +1016,11 @@ TOOLS/vivodump$(EXESUF): TOOLS/vivodump.c
TOOLS/netstream$(EXESUF) TOOLS/vivodump$(EXESUF): $(subst mplayer.o,mplayer-nomain.o,$(OBJS_MPLAYER)) $(filter-out %mencoder.o,$(OBJS_MENCODER)) $(OBJS_COMMON) $(COMMON_LIBS)
$(CC) $(CFLAGS) -o $@ $^ $(EXTRALIBS_MPLAYER) $(EXTRALIBS_MENCODER) $(COMMON_LDFLAGS)
TOOLS/fastmem-c$(EXESUF): CFLAGS += -DHAVE_MMX=0 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"C\"
TOOLS/fastmem-mmx$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MMX\"
TOOLS/fastmem-k6$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"K6\"
TOOLS/fastmem-k7$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"K7\"
TOOLS/fastmem-sse$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"SSE\"
TOOLS/fastmem-mga-mmx$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-MMX\" -DCONFIG_MGA
TOOLS/fastmem-mga-k6$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-K6\" -DCONFIG_MGA
TOOLS/fastmem-mga-k7$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"MGA-K7\" -DCONFIG_MGA
TOOLS/fastmem-mga-sse$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"MGA-SSE\" -DCONFIG_MGA
fastmemcpybench: $(addsuffix $(EXESUF),$(addprefix TOOLS/fastmem-,c mmx k6 k7 sse mga-mmx mga-k6 mga-k7 mga-sse))
TOOLS/fastmem-%$(EXESUF): TOOLS/fastmemcpybench.c libvo/aclib.c
$(CC) $(CFLAGS) -o $@ $^
REAL_SRCS = $(wildcard TOOLS/realcodecs/*.c)
REAL_TARGETS = $(REAL_SRCS:.c=.so.6.0)
realcodecs: $(REAL_TARGETS)
fastmemcpybench realcodecs: CFLAGS += -g
realcodecs: CFLAGS += -g
%.so.6.0: %.o
ld -shared -o $@ $< -ldl -lc

View File

@ -1,21 +0,0 @@
sync
sleep 2
./fastmem-k6
sleep 2
./fastmem-k7
sleep 2
./fastmem-mmx
sleep 2
./fastmem-sse
sleep 2
./fastmem-c
sleep 2
./fastmem2-k6
sleep 2
./fastmem2-k7
sleep 2
./fastmem2-mmx
sleep 2
./fastmem2-sse
sleep 2

View File

@ -7,8 +7,6 @@
* was not confirmed through testing.
*/
/* According to Uoti this code is broken. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -18,7 +16,92 @@
#include <sys/mman.h>
#include <sys/time.h>
#include <inttypes.h>
#include "libvo/fastmemcpy.h"
#include "config.h"
#include "cpudetect.h"
#define BLOCK_SIZE 4096
#define CONFUSION_FACTOR 0
#if HAVE_MMX
#define COMPILE_MMX
#endif
#if HAVE_MMX2
#define COMPILE_MMX2
#endif
#if HAVE_AMD3DNOW
#define COMPILE_AMD3DNOW
#endif
#if HAVE_SSE
#define COMPILE_SSE
#endif
#ifdef COMPILE_MMX
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
#define HAVE_MMX 1
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 0
#define HAVE_SSE 0
#define HAVE_SSE2 0
#define RENAME(a) a ## _MMX
#include "libvo/aclib_template.c"
#endif
#ifdef COMPILE_MMX2
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
#define HAVE_MMX 1
#define HAVE_MMX2 1
#define HAVE_AMD3DNOW 0
#define HAVE_SSE 0
#define HAVE_SSE2 0
#define RENAME(a) a ## _MMX2
#include "libvo/aclib_template.c"
#endif
#ifdef COMPILE_AMD3DNOW
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
#define HAVE_MMX 1
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 1
#define HAVE_SSE 0
#define HAVE_SSE2 0
#define RENAME(a) a ## _3DNow
#include "libvo/aclib_template.c"
#endif
#ifdef COMPILE_SSE
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#undef HAVE_SSE
#undef HAVE_SSE2
#define HAVE_MMX 1
#define HAVE_MMX2 1
#define HAVE_AMD3DNOW 0
#define HAVE_SSE 1
#define HAVE_SSE2 1
#define RENAME(a) a ## _SSE
#include "libvo/aclib_template.c"
#endif
//#define ARR_SIZE 100000
#define ARR_SIZE (1024*768*2)
@ -114,11 +197,60 @@ int main(void)
t = GetTimer();
v1 = read_tsc();
for (i = 0; i < 100; i++)
fast_memcpy(marr1, marr2, ARR_SIZE - 16);
memcpy(marr1, marr2, ARR_SIZE - 16);
v2 = read_tsc();
t = GetTimer() - t;
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
printf(NAME ": CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
printf("libc: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
#if HAVE_MMX
t = GetTimer();
v1 = read_tsc();
for (i = 0; i < 100; i++)
fast_memcpy_MMX(marr1, marr2, ARR_SIZE - 16);
v2 = read_tsc();
t = GetTimer() - t;
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
printf("MMX: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
#endif
#if HAVE_AMD3DNOW
t = GetTimer();
v1 = read_tsc();
for (i = 0; i < 100; i++)
fast_memcpy_3DNow(marr1, marr2, ARR_SIZE - 16);
v2 = read_tsc();
t = GetTimer() - t;
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
printf("3DNow!: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
#endif
#if HAVE_MMX2
t = GetTimer();
v1 = read_tsc();
for (i = 0; i < 100; i++)
fast_memcpy_MMX2(marr1, marr2, ARR_SIZE - 16);
v2 = read_tsc();
t = GetTimer() - t;
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
printf("MMX2: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
#endif
#if HAVE_SSE
t = GetTimer();
v1 = read_tsc();
for (i = 0; i < 100; i++)
fast_memcpy_SSE(marr1, marr2, ARR_SIZE - 16);
v2 = read_tsc();
t = GetTimer() - t;
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
printf("SSE: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
#endif
return 0;
}