mirror of git://git.musl-libc.org/musl
enhance build process to allow selective -O3 optimization
the motivation for this patch is that the vast majority of libc is code that does not benefit at all from optimizations, but that certain components like string/memory operations can be major performance bottlenecks. at the same time, the old -falign-*=1 options are removed, since they were only beneficial for avoiding bloat when global -O3 was used, and in that case, they may have prevented some of the performance gains. to be the most useful, this patch will need further tuning. in particular, research is needed to determine which components should be built with -O3 by default, and it may be desirable to remove the hard-coded -O3 and instead allow more customization of the optimization level used for selected modules.
This commit is contained in:
parent
9578f0530a
commit
a80847d86a
3
Makefile
3
Makefile
|
@ -80,6 +80,9 @@ include/bits/alltypes.h: include/bits/alltypes.h.in include/alltypes.h.in tools/
|
||||||
|
|
||||||
src/ldso/dynlink.lo: arch/$(ARCH)/reloc.h
|
src/ldso/dynlink.lo: arch/$(ARCH)/reloc.h
|
||||||
|
|
||||||
|
OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=src/%))
|
||||||
|
$(OPTIMIZE_SRCS:%.c=%.o) $(OPTIMIZE_SRCS:%.c=%.lo): CFLAGS += -O3
|
||||||
|
|
||||||
%.o: $(ARCH)/%.s
|
%.o: $(ARCH)/%.s
|
||||||
$(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $<
|
$(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $<
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ System types:
|
||||||
--host=HOST same as --target
|
--host=HOST same as --target
|
||||||
|
|
||||||
Optional features:
|
Optional features:
|
||||||
|
--enable-optimize=... optimize listed components for speed over size [auto]
|
||||||
--enable-debug build with debugging information [disabled]
|
--enable-debug build with debugging information [disabled]
|
||||||
--enable-warnings build with recommended warnings flags [disabled]
|
--enable-warnings build with recommended warnings flags [disabled]
|
||||||
--enable-gcc-wrapper build musl-gcc toolchain wrapper [auto]
|
--enable-gcc-wrapper build musl-gcc toolchain wrapper [auto]
|
||||||
|
@ -104,6 +105,7 @@ fi
|
||||||
CFLAGS_C99FSE=
|
CFLAGS_C99FSE=
|
||||||
CFLAGS_AUTO=
|
CFLAGS_AUTO=
|
||||||
LDFLAGS_AUTO=
|
LDFLAGS_AUTO=
|
||||||
|
OPTIMIZE_GLOBS=
|
||||||
prefix=/usr/local/musl
|
prefix=/usr/local/musl
|
||||||
exec_prefix='$(prefix)'
|
exec_prefix='$(prefix)'
|
||||||
bindir='$(exec_prefix)/bin'
|
bindir='$(exec_prefix)/bin'
|
||||||
|
@ -111,6 +113,7 @@ libdir='$(prefix)/lib'
|
||||||
includedir='$(prefix)/include'
|
includedir='$(prefix)/include'
|
||||||
syslibdir='/lib'
|
syslibdir='/lib'
|
||||||
target=
|
target=
|
||||||
|
optimize=auto
|
||||||
debug=no
|
debug=no
|
||||||
warnings=no
|
warnings=no
|
||||||
shared=yes
|
shared=yes
|
||||||
|
@ -129,6 +132,9 @@ case "$arg" in
|
||||||
--disable-shared|--enable-shared=no) shared=no ;;
|
--disable-shared|--enable-shared=no) shared=no ;;
|
||||||
--enable-static|--enable-static=yes) static=yes ;;
|
--enable-static|--enable-static=yes) static=yes ;;
|
||||||
--disable-static|--enable-static=no) static=no ;;
|
--disable-static|--enable-static=no) static=no ;;
|
||||||
|
--enable-optimize) optimize=yes ;;
|
||||||
|
--enable-optimize=*) optimize=${arg#*=} ;;
|
||||||
|
--disable-optimize) optimize=no ;;
|
||||||
--enable-debug|--enable-debug=yes) debug=yes ;;
|
--enable-debug|--enable-debug=yes) debug=yes ;;
|
||||||
--disable-debug|--enable-debug=no) debug=no ;;
|
--disable-debug|--enable-debug=no) debug=no ;;
|
||||||
--enable-warnings|--enable-warnings=yes) warnings=yes ;;
|
--enable-warnings|--enable-warnings=yes) warnings=yes ;;
|
||||||
|
@ -230,14 +236,57 @@ tryflag CFLAGS_C99FSE -fexcess-precision=standard \
|
||||||
|| { test "$ARCH" = i386 && tryflag CFLAGS_C99FSE -ffloat-store ; }
|
|| { test "$ARCH" = i386 && tryflag CFLAGS_C99FSE -ffloat-store ; }
|
||||||
tryflag CFLAGS_C99FSE -frounding-math
|
tryflag CFLAGS_C99FSE -frounding-math
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Setup basic default CFLAGS: debug, optimization, and -pipe
|
# If debugging is explicitly enabled, don't auto-enable optimizations
|
||||||
#
|
#
|
||||||
if fnmatch '-O*|*\ -O*' "$CFLAGS_AUTO $CFLAGS" ; then :
|
if test "$debug" = yes ; then
|
||||||
else
|
CFLAGS_AUTO=-g
|
||||||
tryflag CFLAGS_AUTO -Os || tryflag CFLAGS_AUTO -O2
|
test "$optimize" = auto && optimize=no
|
||||||
fi
|
fi
|
||||||
test "x$debug" = xyes && CFLAGS_AUTO="-g"
|
|
||||||
|
#
|
||||||
|
# Possibly add a -O option to CFLAGS and select modules to optimize with
|
||||||
|
# -O3 based on the status of --enable-optimize and provided CFLAGS.
|
||||||
|
#
|
||||||
|
printf "checking for optimization settings... "
|
||||||
|
case "x$optimize" in
|
||||||
|
xauto)
|
||||||
|
if fnmatch '-O*|*\ -O*' "$CFLAGS_AUTO $CFLAGS" ; then
|
||||||
|
printf "using provided CFLAGS\n" ;optimize=no
|
||||||
|
else
|
||||||
|
printf "using defaults\n" ; optimize=yes
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
xsize|xnone) printf "minimize size\n" ; optimize=size ;;
|
||||||
|
xno|x) printf "disabled\n" ; optimize=no ;;
|
||||||
|
*) printf "custom\n" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
test "$optimize" = no || tryflag CFLAGS_AUTO -Os || tryflag CFLAGS_AUTO -O2
|
||||||
|
test "$optimize" = yes && optimize="internal,malloc,math,string"
|
||||||
|
|
||||||
|
if fnmatch 'no|size' "$optimize" ; then :
|
||||||
|
else
|
||||||
|
printf "components to be optimized for speed:"
|
||||||
|
while test "$optimize" ; do
|
||||||
|
case "$optimize" in
|
||||||
|
*,*) this=${optimize%%,*} optimize=${optimize#*,} ;;
|
||||||
|
*) this=$optimize optimize=
|
||||||
|
esac
|
||||||
|
printf " $this"
|
||||||
|
case "$this" in
|
||||||
|
*/*.c) ;;
|
||||||
|
*/*) this=$this*.c ;;
|
||||||
|
*) this=$this/*.c ;;
|
||||||
|
esac
|
||||||
|
OPTIMIZE_GLOBS="$OPTIMIZE_GLOBS $this"
|
||||||
|
done
|
||||||
|
OPTIMIZE_GLOBS=${OPTIMIZE_GLOBS# }
|
||||||
|
printf "\n"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Always try -pipe
|
||||||
tryflag CFLAGS_AUTO -pipe
|
tryflag CFLAGS_AUTO -pipe
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -266,14 +315,6 @@ tryflag CFLAGS_AUTO -fno-asynchronous-unwind-tables
|
||||||
#
|
#
|
||||||
tryflag CFLAGS_AUTO -Wa,--noexecstack
|
tryflag CFLAGS_AUTO -Wa,--noexecstack
|
||||||
|
|
||||||
#
|
|
||||||
# Some optimization levels add bloated alignment that hurt performance
|
|
||||||
#
|
|
||||||
tryflag CFLAGS_AUTO -falign-functions=1
|
|
||||||
tryflag CFLAGS_AUTO -falign-labels=1
|
|
||||||
tryflag CFLAGS_AUTO -falign-loops=1
|
|
||||||
tryflag CFLAGS_AUTO -falign-jumps=1
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# On x86, make sure we don't have incompatible instruction set
|
# On x86, make sure we don't have incompatible instruction set
|
||||||
# extensions enabled by default. This is bad for making static binaries.
|
# extensions enabled by default. This is bad for making static binaries.
|
||||||
|
@ -366,6 +407,7 @@ CPPFLAGS = $CPPFLAGS
|
||||||
LDFLAGS = $LDFLAGS_AUTO $LDFLAGS
|
LDFLAGS = $LDFLAGS_AUTO $LDFLAGS
|
||||||
CROSS_COMPILE = $CROSS_COMPILE
|
CROSS_COMPILE = $CROSS_COMPILE
|
||||||
LIBCC = $LIBCC
|
LIBCC = $LIBCC
|
||||||
|
OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS
|
||||||
EOF
|
EOF
|
||||||
test "x$static" = xno && echo "STATIC_LIBS ="
|
test "x$static" = xno && echo "STATIC_LIBS ="
|
||||||
test "x$shared" = xno && echo "SHARED_LIBS ="
|
test "x$shared" = xno && echo "SHARED_LIBS ="
|
||||||
|
|
Loading…
Reference in New Issue