mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-14 11:21:29 +00:00
configure: aarch64: Support assembling the dotprod and i8mm arch extensions
These are available since ARMv8.4-a and ARMv8.6-a respectively, but can also be available optionally since ARMv8.2-a. Check if ".arch armv8.2-a" and ".arch_extension {dotprod,i8mm}" are supported, and check if the instructions can be assembled. Current clang versions fail to support the dotprod and i8mm features in the .arch_extension directive, but do support them if enabled with -march=armv8.4-a on the command line. (Curiously, lowering the arch level with ".arch armv8.2-a" doesn't make the extensions unavailable if they were enabled with -march; if that changes, Clang should also learn to support these extensions via .arch_extension for them to remain usable here.) Signed-off-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
parent
fa11c4c7fa
commit
fb1b88af77
81
configure
vendored
81
configure
vendored
@ -454,6 +454,8 @@ Optimization options (experts only):
|
|||||||
--disable-armv6t2 disable armv6t2 optimizations
|
--disable-armv6t2 disable armv6t2 optimizations
|
||||||
--disable-vfp disable VFP optimizations
|
--disable-vfp disable VFP optimizations
|
||||||
--disable-neon disable NEON optimizations
|
--disable-neon disable NEON optimizations
|
||||||
|
--disable-dotprod disable DOTPROD optimizations
|
||||||
|
--disable-i8mm disable I8MM optimizations
|
||||||
--disable-inline-asm disable use of inline assembly
|
--disable-inline-asm disable use of inline assembly
|
||||||
--disable-x86asm disable use of standalone x86 assembly
|
--disable-x86asm disable use of standalone x86 assembly
|
||||||
--disable-mipsdsp disable MIPS DSP ASE R1 optimizations
|
--disable-mipsdsp disable MIPS DSP ASE R1 optimizations
|
||||||
@ -1154,6 +1156,43 @@ check_insn(){
|
|||||||
check_as ${1}_external "$2"
|
check_as ${1}_external "$2"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
check_arch_level(){
|
||||||
|
log check_arch_level "$@"
|
||||||
|
level="$1"
|
||||||
|
check_as tested_arch_level ".arch $level"
|
||||||
|
enabled tested_arch_level && as_arch_level="$level"
|
||||||
|
}
|
||||||
|
|
||||||
|
check_archext_insn(){
|
||||||
|
log check_archext_insn "$@"
|
||||||
|
feature="$1"
|
||||||
|
instr="$2"
|
||||||
|
# Check if the assembly is accepted in inline assembly.
|
||||||
|
check_inline_asm ${feature}_inline "\"$instr\""
|
||||||
|
# We don't check if the instruction is supported out of the box by the
|
||||||
|
# external assembler (we don't try to set ${feature}_external) as we don't
|
||||||
|
# need to use these instructions in non-runtime detected codepaths.
|
||||||
|
|
||||||
|
disable $feature
|
||||||
|
|
||||||
|
enabled as_arch_directive && arch_directive=".arch $as_arch_level" || arch_directive=""
|
||||||
|
|
||||||
|
# Test if the assembler supports the .arch_extension $feature directive.
|
||||||
|
arch_extension_directive=".arch_extension $feature"
|
||||||
|
test_as <<EOF && enable as_archext_${feature}_directive || arch_extension_directive=""
|
||||||
|
$arch_directive
|
||||||
|
$arch_extension_directive
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Test if we can assemble the instruction after potential .arch and
|
||||||
|
# .arch_extension directives.
|
||||||
|
test_as <<EOF && enable ${feature}
|
||||||
|
$arch_directive
|
||||||
|
$arch_extension_directive
|
||||||
|
$instr
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
check_x86asm(){
|
check_x86asm(){
|
||||||
log check_x86asm "$@"
|
log check_x86asm "$@"
|
||||||
name=$1
|
name=$1
|
||||||
@ -2059,6 +2098,8 @@ ARCH_EXT_LIST_ARM="
|
|||||||
armv6
|
armv6
|
||||||
armv6t2
|
armv6t2
|
||||||
armv8
|
armv8
|
||||||
|
dotprod
|
||||||
|
i8mm
|
||||||
neon
|
neon
|
||||||
vfp
|
vfp
|
||||||
vfpv3
|
vfpv3
|
||||||
@ -2322,6 +2363,8 @@ SYSTEM_LIBRARIES="
|
|||||||
|
|
||||||
TOOLCHAIN_FEATURES="
|
TOOLCHAIN_FEATURES="
|
||||||
as_arch_directive
|
as_arch_directive
|
||||||
|
as_archext_dotprod_directive
|
||||||
|
as_archext_i8mm_directive
|
||||||
as_dn_directive
|
as_dn_directive
|
||||||
as_fpu_directive
|
as_fpu_directive
|
||||||
as_func
|
as_func
|
||||||
@ -2622,6 +2665,8 @@ intrinsics_neon_deps="neon"
|
|||||||
vfp_deps_any="aarch64 arm"
|
vfp_deps_any="aarch64 arm"
|
||||||
vfpv3_deps="vfp"
|
vfpv3_deps="vfp"
|
||||||
setend_deps="arm"
|
setend_deps="arm"
|
||||||
|
dotprod_deps="aarch64 neon"
|
||||||
|
i8mm_deps="aarch64 neon"
|
||||||
|
|
||||||
map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
|
map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
|
||||||
|
|
||||||
@ -5988,12 +6033,27 @@ check_inline_asm inline_asm_labels '"1:\n"'
|
|||||||
check_inline_asm inline_asm_nonlocal_labels '"Label:\n"'
|
check_inline_asm inline_asm_nonlocal_labels '"Label:\n"'
|
||||||
|
|
||||||
if enabled aarch64; then
|
if enabled aarch64; then
|
||||||
|
as_arch_level="armv8-a"
|
||||||
|
check_as as_arch_directive ".arch $as_arch_level"
|
||||||
|
enabled as_arch_directive && check_arch_level armv8.2-a
|
||||||
|
|
||||||
enabled armv8 && check_insn armv8 'prfm pldl1strm, [x0]'
|
enabled armv8 && check_insn armv8 'prfm pldl1strm, [x0]'
|
||||||
# internal assembler in clang 3.3 does not support this instruction
|
# internal assembler in clang 3.3 does not support this instruction
|
||||||
enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1'
|
enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1'
|
||||||
enabled vfp && check_insn vfp 'fmadd d0, d0, d1, d2'
|
enabled vfp && check_insn vfp 'fmadd d0, d0, d1, d2'
|
||||||
|
|
||||||
map 'enabled_any ${v}_external ${v}_inline || disable $v' $ARCH_EXT_LIST_ARM
|
archext_list="dotprod i8mm"
|
||||||
|
enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b'
|
||||||
|
enabled i8mm && check_archext_insn i8mm 'usdot v0.4s, v0.16b, v0.16b'
|
||||||
|
|
||||||
|
# Disable the main feature (e.g. HAVE_NEON) if neither inline nor external
|
||||||
|
# assembly support the feature out of the box. Skip this for the features
|
||||||
|
# checked with check_archext_insn above, as that function takes care of
|
||||||
|
# updating all the variables as necessary.
|
||||||
|
for v in $ARCH_EXT_LIST_ARM; do
|
||||||
|
is_in $v $archext_list && continue
|
||||||
|
enabled_any ${v}_external ${v}_inline || disable $v
|
||||||
|
done
|
||||||
|
|
||||||
elif enabled alpha; then
|
elif enabled alpha; then
|
||||||
|
|
||||||
@ -6022,6 +6082,12 @@ EOF
|
|||||||
warn "Compiler does not indicate floating-point ABI, guessing $fpabi."
|
warn "Compiler does not indicate floating-point ABI, guessing $fpabi."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Test for various instruction sets, testing support both in inline and
|
||||||
|
# external assembly. This sets the ${v}_inline or ${v}_external flags
|
||||||
|
# if the instruction can be used unconditionally in either inline or
|
||||||
|
# external assembly. This means that if the ${v}_external feature is set,
|
||||||
|
# that feature can be used unconditionally in various support macros
|
||||||
|
# anywhere in external assembly, in any function.
|
||||||
enabled armv5te && check_insn armv5te 'qadd r0, r0, r0'
|
enabled armv5te && check_insn armv5te 'qadd r0, r0, r0'
|
||||||
enabled armv6 && check_insn armv6 'sadd16 r0, r0, r0'
|
enabled armv6 && check_insn armv6 'sadd16 r0, r0, r0'
|
||||||
enabled armv6t2 && check_insn armv6t2 'movt r0, #0'
|
enabled armv6t2 && check_insn armv6t2 'movt r0, #0'
|
||||||
@ -6030,6 +6096,14 @@ EOF
|
|||||||
enabled vfpv3 && check_insn vfpv3 'vmov.f32 s0, #1.0'
|
enabled vfpv3 && check_insn vfpv3 'vmov.f32 s0, #1.0'
|
||||||
enabled setend && check_insn setend 'setend be'
|
enabled setend && check_insn setend 'setend be'
|
||||||
|
|
||||||
|
# If neither inline nor external assembly can use the feature by default,
|
||||||
|
# disable the main unsuffixed feature (e.g. HAVE_NEON).
|
||||||
|
#
|
||||||
|
# For targets that support runtime CPU feature detection, don't disable
|
||||||
|
# the main feature flag - there we assume that all supported toolchains
|
||||||
|
# can assemble code for all instruction set features (e.g. NEON) with
|
||||||
|
# suitable assembly flags (such as ".fpu neon"); we don't check
|
||||||
|
# specifically that they really do.
|
||||||
[ $target_os = linux ] || [ $target_os = android ] ||
|
[ $target_os = linux ] || [ $target_os = android ] ||
|
||||||
map 'enabled_any ${v}_external ${v}_inline || disable $v' \
|
map 'enabled_any ${v}_external ${v}_inline || disable $v' \
|
||||||
$ARCH_EXT_LIST_ARM
|
$ARCH_EXT_LIST_ARM
|
||||||
@ -7610,6 +7684,8 @@ fi
|
|||||||
if enabled aarch64; then
|
if enabled aarch64; then
|
||||||
echo "NEON enabled ${neon-no}"
|
echo "NEON enabled ${neon-no}"
|
||||||
echo "VFP enabled ${vfp-no}"
|
echo "VFP enabled ${vfp-no}"
|
||||||
|
echo "DOTPROD enabled ${dotprod-no}"
|
||||||
|
echo "I8MM enabled ${i8mm-no}"
|
||||||
fi
|
fi
|
||||||
if enabled arm; then
|
if enabled arm; then
|
||||||
echo "ARMv5TE enabled ${armv5te-no}"
|
echo "ARMv5TE enabled ${armv5te-no}"
|
||||||
@ -7900,6 +7976,9 @@ test -n "$assert_level" &&
|
|||||||
test -n "$malloc_prefix" &&
|
test -n "$malloc_prefix" &&
|
||||||
echo "#define MALLOC_PREFIX $malloc_prefix" >>$TMPH
|
echo "#define MALLOC_PREFIX $malloc_prefix" >>$TMPH
|
||||||
|
|
||||||
|
enabled aarch64 &&
|
||||||
|
echo "#define AS_ARCH_LEVEL $as_arch_level" >>$TMPH
|
||||||
|
|
||||||
if enabled x86asm; then
|
if enabled x86asm; then
|
||||||
append config_files $TMPASM
|
append config_files $TMPASM
|
||||||
cat > $TMPASM <<EOF
|
cat > $TMPASM <<EOF
|
||||||
|
@ -36,6 +36,17 @@
|
|||||||
# define __has_feature(x) 0
|
# define __has_feature(x) 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_AS_ARCH_DIRECTIVE
|
||||||
|
.arch AS_ARCH_LEVEL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE
|
||||||
|
.arch_extension dotprod
|
||||||
|
#endif
|
||||||
|
#if HAVE_AS_ARCHEXT_I8MM_DIRECTIVE
|
||||||
|
.arch_extension i8mm
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* Support macros for
|
/* Support macros for
|
||||||
* - Armv8.3-A Pointer Authentication and
|
* - Armv8.3-A Pointer Authentication and
|
||||||
|
Loading…
Reference in New Issue
Block a user