mirror of https://github.com/mpv-player/mpv
Fix and restructure fastmemcpybench. It is now one binary that runs all
available memcpy variants and prints benchmark results about them. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@28929 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
parent
cae6488255
commit
143a63fd9e
22
Makefile
22
Makefile
|
@ -986,7 +986,7 @@ tests: $(addsuffix $(EXESUF),$(TESTS))
|
||||||
testsclean:
|
testsclean:
|
||||||
-rm -f $(foreach file,$(TESTS),$(call ADD_ALL_EXESUFS,$(file)))
|
-rm -f $(foreach file,$(TESTS),$(call ADD_ALL_EXESUFS,$(file)))
|
||||||
|
|
||||||
TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 movinfo netstream subrip vivodump)
|
TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 fastmemcpybench movinfo netstream subrip vivodump)
|
||||||
|
|
||||||
ifdef ARCH_X86
|
ifdef ARCH_X86
|
||||||
TOOLS += TOOLS/modify_reg
|
TOOLS += TOOLS/modify_reg
|
||||||
|
@ -999,7 +999,7 @@ alltools: $(addsuffix $(EXESUF),$(ALLTOOLS))
|
||||||
|
|
||||||
toolsclean:
|
toolsclean:
|
||||||
-rm -f $(foreach file,$(ALLTOOLS),$(call ADD_ALL_EXESUFS,$(file)))
|
-rm -f $(foreach file,$(ALLTOOLS),$(call ADD_ALL_EXESUFS,$(file)))
|
||||||
-rm -f TOOLS/fastmem-* TOOLS/realcodecs/*.so.6.0
|
-rm -f TOOLS/realcodecs/*.so.6.0
|
||||||
|
|
||||||
TOOLS/bmovl-test$(EXESUF): -lSDL_image
|
TOOLS/bmovl-test$(EXESUF): -lSDL_image
|
||||||
|
|
||||||
|
@ -1016,27 +1016,11 @@ TOOLS/vivodump$(EXESUF): TOOLS/vivodump.c
|
||||||
TOOLS/netstream$(EXESUF) TOOLS/vivodump$(EXESUF): $(subst mplayer.o,mplayer-nomain.o,$(OBJS_MPLAYER)) $(filter-out %mencoder.o,$(OBJS_MENCODER)) $(OBJS_COMMON) $(COMMON_LIBS)
|
TOOLS/netstream$(EXESUF) TOOLS/vivodump$(EXESUF): $(subst mplayer.o,mplayer-nomain.o,$(OBJS_MPLAYER)) $(filter-out %mencoder.o,$(OBJS_MENCODER)) $(OBJS_COMMON) $(COMMON_LIBS)
|
||||||
$(CC) $(CFLAGS) -o $@ $^ $(EXTRALIBS_MPLAYER) $(EXTRALIBS_MENCODER) $(COMMON_LDFLAGS)
|
$(CC) $(CFLAGS) -o $@ $^ $(EXTRALIBS_MPLAYER) $(EXTRALIBS_MENCODER) $(COMMON_LDFLAGS)
|
||||||
|
|
||||||
TOOLS/fastmem-c$(EXESUF): CFLAGS += -DHAVE_MMX=0 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"C\"
|
|
||||||
TOOLS/fastmem-mmx$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MMX\"
|
|
||||||
TOOLS/fastmem-k6$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"K6\"
|
|
||||||
TOOLS/fastmem-k7$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"K7\"
|
|
||||||
TOOLS/fastmem-sse$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"SSE\"
|
|
||||||
TOOLS/fastmem-mga-mmx$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-MMX\" -DCONFIG_MGA
|
|
||||||
TOOLS/fastmem-mga-k6$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-K6\" -DCONFIG_MGA
|
|
||||||
TOOLS/fastmem-mga-k7$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"MGA-K7\" -DCONFIG_MGA
|
|
||||||
TOOLS/fastmem-mga-sse$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"MGA-SSE\" -DCONFIG_MGA
|
|
||||||
|
|
||||||
fastmemcpybench: $(addsuffix $(EXESUF),$(addprefix TOOLS/fastmem-,c mmx k6 k7 sse mga-mmx mga-k6 mga-k7 mga-sse))
|
|
||||||
|
|
||||||
TOOLS/fastmem-%$(EXESUF): TOOLS/fastmemcpybench.c libvo/aclib.c
|
|
||||||
$(CC) $(CFLAGS) -o $@ $^
|
|
||||||
|
|
||||||
REAL_SRCS = $(wildcard TOOLS/realcodecs/*.c)
|
REAL_SRCS = $(wildcard TOOLS/realcodecs/*.c)
|
||||||
REAL_TARGETS = $(REAL_SRCS:.c=.so.6.0)
|
REAL_TARGETS = $(REAL_SRCS:.c=.so.6.0)
|
||||||
|
|
||||||
realcodecs: $(REAL_TARGETS)
|
realcodecs: $(REAL_TARGETS)
|
||||||
|
realcodecs: CFLAGS += -g
|
||||||
fastmemcpybench realcodecs: CFLAGS += -g
|
|
||||||
|
|
||||||
%.so.6.0: %.o
|
%.so.6.0: %.o
|
||||||
ld -shared -o $@ $< -ldl -lc
|
ld -shared -o $@ $< -ldl -lc
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
|
|
||||||
sync
|
|
||||||
sleep 2
|
|
||||||
./fastmem-k6
|
|
||||||
sleep 2
|
|
||||||
./fastmem-k7
|
|
||||||
sleep 2
|
|
||||||
./fastmem-mmx
|
|
||||||
sleep 2
|
|
||||||
./fastmem-sse
|
|
||||||
sleep 2
|
|
||||||
./fastmem-c
|
|
||||||
sleep 2
|
|
||||||
./fastmem2-k6
|
|
||||||
sleep 2
|
|
||||||
./fastmem2-k7
|
|
||||||
sleep 2
|
|
||||||
./fastmem2-mmx
|
|
||||||
sleep 2
|
|
||||||
./fastmem2-sse
|
|
||||||
sleep 2
|
|
|
@ -7,8 +7,6 @@
|
||||||
* was not confirmed through testing.
|
* was not confirmed through testing.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* According to Uoti this code is broken. */
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
@ -18,7 +16,92 @@
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include "libvo/fastmemcpy.h"
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "cpudetect.h"
|
||||||
|
|
||||||
|
#define BLOCK_SIZE 4096
|
||||||
|
#define CONFUSION_FACTOR 0
|
||||||
|
|
||||||
|
#if HAVE_MMX
|
||||||
|
#define COMPILE_MMX
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_MMX2
|
||||||
|
#define COMPILE_MMX2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_AMD3DNOW
|
||||||
|
#define COMPILE_AMD3DNOW
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_SSE
|
||||||
|
#define COMPILE_SSE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef COMPILE_MMX
|
||||||
|
#undef RENAME
|
||||||
|
#undef HAVE_MMX
|
||||||
|
#undef HAVE_MMX2
|
||||||
|
#undef HAVE_AMD3DNOW
|
||||||
|
#undef HAVE_SSE
|
||||||
|
#undef HAVE_SSE2
|
||||||
|
#define HAVE_MMX 1
|
||||||
|
#define HAVE_MMX2 0
|
||||||
|
#define HAVE_AMD3DNOW 0
|
||||||
|
#define HAVE_SSE 0
|
||||||
|
#define HAVE_SSE2 0
|
||||||
|
#define RENAME(a) a ## _MMX
|
||||||
|
#include "libvo/aclib_template.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef COMPILE_MMX2
|
||||||
|
#undef RENAME
|
||||||
|
#undef HAVE_MMX
|
||||||
|
#undef HAVE_MMX2
|
||||||
|
#undef HAVE_AMD3DNOW
|
||||||
|
#undef HAVE_SSE
|
||||||
|
#undef HAVE_SSE2
|
||||||
|
#define HAVE_MMX 1
|
||||||
|
#define HAVE_MMX2 1
|
||||||
|
#define HAVE_AMD3DNOW 0
|
||||||
|
#define HAVE_SSE 0
|
||||||
|
#define HAVE_SSE2 0
|
||||||
|
#define RENAME(a) a ## _MMX2
|
||||||
|
#include "libvo/aclib_template.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef COMPILE_AMD3DNOW
|
||||||
|
#undef RENAME
|
||||||
|
#undef HAVE_MMX
|
||||||
|
#undef HAVE_MMX2
|
||||||
|
#undef HAVE_AMD3DNOW
|
||||||
|
#undef HAVE_SSE
|
||||||
|
#undef HAVE_SSE2
|
||||||
|
#define HAVE_MMX 1
|
||||||
|
#define HAVE_MMX2 0
|
||||||
|
#define HAVE_AMD3DNOW 1
|
||||||
|
#define HAVE_SSE 0
|
||||||
|
#define HAVE_SSE2 0
|
||||||
|
#define RENAME(a) a ## _3DNow
|
||||||
|
#include "libvo/aclib_template.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef COMPILE_SSE
|
||||||
|
#undef RENAME
|
||||||
|
#undef HAVE_MMX
|
||||||
|
#undef HAVE_MMX2
|
||||||
|
#undef HAVE_AMD3DNOW
|
||||||
|
#undef HAVE_SSE
|
||||||
|
#undef HAVE_SSE2
|
||||||
|
#define HAVE_MMX 1
|
||||||
|
#define HAVE_MMX2 1
|
||||||
|
#define HAVE_AMD3DNOW 0
|
||||||
|
#define HAVE_SSE 1
|
||||||
|
#define HAVE_SSE2 1
|
||||||
|
#define RENAME(a) a ## _SSE
|
||||||
|
#include "libvo/aclib_template.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
//#define ARR_SIZE 100000
|
//#define ARR_SIZE 100000
|
||||||
#define ARR_SIZE (1024*768*2)
|
#define ARR_SIZE (1024*768*2)
|
||||||
|
@ -114,11 +197,60 @@ int main(void)
|
||||||
t = GetTimer();
|
t = GetTimer();
|
||||||
v1 = read_tsc();
|
v1 = read_tsc();
|
||||||
for (i = 0; i < 100; i++)
|
for (i = 0; i < 100; i++)
|
||||||
fast_memcpy(marr1, marr2, ARR_SIZE - 16);
|
memcpy(marr1, marr2, ARR_SIZE - 16);
|
||||||
v2 = read_tsc();
|
v2 = read_tsc();
|
||||||
t = GetTimer() - t;
|
t = GetTimer() - t;
|
||||||
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
|
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
|
||||||
printf(NAME ": CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
|
printf("libc: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
|
||||||
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
|
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
|
||||||
|
|
||||||
|
#if HAVE_MMX
|
||||||
|
t = GetTimer();
|
||||||
|
v1 = read_tsc();
|
||||||
|
for (i = 0; i < 100; i++)
|
||||||
|
fast_memcpy_MMX(marr1, marr2, ARR_SIZE - 16);
|
||||||
|
v2 = read_tsc();
|
||||||
|
t = GetTimer() - t;
|
||||||
|
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
|
||||||
|
printf("MMX: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
|
||||||
|
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_AMD3DNOW
|
||||||
|
t = GetTimer();
|
||||||
|
v1 = read_tsc();
|
||||||
|
for (i = 0; i < 100; i++)
|
||||||
|
fast_memcpy_3DNow(marr1, marr2, ARR_SIZE - 16);
|
||||||
|
v2 = read_tsc();
|
||||||
|
t = GetTimer() - t;
|
||||||
|
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
|
||||||
|
printf("3DNow!: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
|
||||||
|
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_MMX2
|
||||||
|
t = GetTimer();
|
||||||
|
v1 = read_tsc();
|
||||||
|
for (i = 0; i < 100; i++)
|
||||||
|
fast_memcpy_MMX2(marr1, marr2, ARR_SIZE - 16);
|
||||||
|
v2 = read_tsc();
|
||||||
|
t = GetTimer() - t;
|
||||||
|
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
|
||||||
|
printf("MMX2: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
|
||||||
|
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_SSE
|
||||||
|
t = GetTimer();
|
||||||
|
v1 = read_tsc();
|
||||||
|
for (i = 0; i < 100; i++)
|
||||||
|
fast_memcpy_SSE(marr1, marr2, ARR_SIZE - 16);
|
||||||
|
v2 = read_tsc();
|
||||||
|
t = GetTimer() - t;
|
||||||
|
// ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
|
||||||
|
printf("SSE: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
|
||||||
|
100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
|
||||||
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue