mirror of https://git.ffmpeg.org/ffmpeg.git
244 lines
6.7 KiB
ArmAsm
244 lines
6.7 KiB
ArmAsm
/*
|
|
* Copyright © 2022 Rémi Denis-Courmont.
|
|
* Loosely based on earlier work copyrighted by Måns Rullgård, 2008.
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#if defined (__riscv_float_abi_soft)
|
|
#define NOHWF
|
|
#define NOHWD
|
|
#define HWF #
|
|
#define HWD #
|
|
#elif defined (__riscv_float_abi_single)
|
|
#define NOHWF #
|
|
#define NOHWD
|
|
#define HWF
|
|
#define HWD #
|
|
#else
|
|
#define NOHWF #
|
|
#define NOHWD #
|
|
#define HWF
|
|
#define HWD
|
|
#endif
|
|
|
|
.macro func sym, ext1=, ext2=
|
|
.text
|
|
.align 2
|
|
|
|
.option push
|
|
.ifnb \ext1
|
|
.option arch, +\ext1
|
|
.ifnb \ext2
|
|
.option arch, +\ext2
|
|
.endif
|
|
.endif
|
|
|
|
.global \sym
|
|
.hidden \sym
|
|
.type \sym, %function
|
|
\sym:
|
|
|
|
.macro endfunc
|
|
.size \sym, . - \sym
|
|
.option pop
|
|
.previous
|
|
.purgem endfunc
|
|
.endm
|
|
.endm
|
|
|
|
.macro const sym, align=3, relocate=0
|
|
.if \relocate
|
|
.pushsection .data.rel.ro
|
|
.else
|
|
.pushsection .rodata
|
|
.endif
|
|
.align \align
|
|
\sym:
|
|
|
|
.macro endconst
|
|
.size \sym, . - \sym
|
|
.popsection
|
|
.purgem endconst
|
|
.endm
|
|
.endm
|
|
|
|
#if !defined (__riscv_zba)
|
|
/* SH{1,2,3}ADD definitions for pre-Zba assemblers */
|
|
.macro shnadd n, rd, rs1, rs2
|
|
.insn r OP, 2 * \n, 16, \rd, \rs1, \rs2
|
|
.endm
|
|
|
|
.macro sh1add rd, rs1, rs2
|
|
shnadd 1, \rd, \rs1, \rs2
|
|
.endm
|
|
|
|
.macro sh2add rd, rs1, rs2
|
|
shnadd 2, \rd, \rs1, \rs2
|
|
.endm
|
|
|
|
.macro sh3add rd, rs1, rs2
|
|
shnadd 3, \rd, \rs1, \rs2
|
|
.endm
|
|
#endif
|
|
|
|
#if defined (__riscv_v_elen)
|
|
# define RV_V_ELEN __riscv_v_elen
|
|
#else
|
|
/* Run-time detection of the V extension implies ELEN >= 64. */
|
|
# define RV_V_ELEN 64
|
|
#endif
|
|
#if RV_V_ELEN == 32
|
|
# define VSEW_MAX 2
|
|
#else
|
|
# define VSEW_MAX 3
|
|
#endif
|
|
|
|
.macro parse_vtype ew, tp, mp
|
|
.ifc \ew,e8
|
|
.equ vsew, 0
|
|
.else
|
|
.ifc \ew,e16
|
|
.equ vsew, 1
|
|
.else
|
|
.ifc \ew,e32
|
|
.equ vsew, 2
|
|
.else
|
|
.ifc \ew,e64
|
|
.equ vsew, 3
|
|
.else
|
|
.error "Unknown element width \ew"
|
|
.endif
|
|
.endif
|
|
.endif
|
|
.endif
|
|
|
|
.ifc \tp,tu
|
|
.equ tp, 0
|
|
.else
|
|
.ifc \tp,ta
|
|
.equ tp, 1
|
|
.else
|
|
.error "Unknown tail policy \tp"
|
|
.endif
|
|
.endif
|
|
|
|
.ifc \mp,mu
|
|
.equ mp, 0
|
|
.else
|
|
.ifc \mp,ma
|
|
.equ mp, 1
|
|
.else
|
|
.error "Unknown mask policy \mp"
|
|
.endif
|
|
.endif
|
|
.endm
|
|
|
|
/**
|
|
* Gets the vector type with the smallest suitable LMUL value.
|
|
* @param[out] rd vector type destination register
|
|
* @param vl vector length constant
|
|
* @param ew element width: e8, e16, e32 or e64
|
|
* @param tp tail policy: tu or ta
|
|
* @param mp mask policty: mu or ma
|
|
*/
|
|
.macro vtype_ivli rd, avl, ew, tp=tu, mp=mu
|
|
.if \avl <= 1
|
|
.equ log2vl, 0
|
|
.elseif \avl <= 2
|
|
.equ log2vl, 1
|
|
.elseif \avl <= 4
|
|
.equ log2vl, 2
|
|
.elseif \avl <= 8
|
|
.equ log2vl, 3
|
|
.elseif \avl <= 16
|
|
.equ log2vl, 4
|
|
.elseif \avl <= 32
|
|
.equ log2vl, 5
|
|
.elseif \avl <= 64
|
|
.equ log2vl, 6
|
|
.elseif \avl <= 128
|
|
.equ log2vl, 7
|
|
.else
|
|
.error "Vector length \avl out of range"
|
|
.endif
|
|
parse_vtype \ew, \tp, \mp
|
|
csrr \rd, vlenb
|
|
clz \rd, \rd
|
|
addi \rd, \rd, log2vl + 1 + VSEW_MAX - __riscv_xlen
|
|
max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
|
|
.if vsew < VSEW_MAX
|
|
addi \rd, \rd, vsew - VSEW_MAX
|
|
andi \rd, \rd, 7
|
|
.endif
|
|
ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7)
|
|
.endm
|
|
|
|
/**
|
|
* Gets the vector type with the smallest suitable LMUL value.
|
|
* @param[out] rd vector type destination register
|
|
* @param rs vector length source register
|
|
* @param[out] tmp temporary register to be clobbered
|
|
* @param ew element width: e8, e16, e32 or e64
|
|
* @param tp tail policy: tu or ta
|
|
* @param mp mask policty: mu or ma
|
|
* @param addend optional addend for the vector length register
|
|
*/
|
|
.macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu, addend=0
|
|
parse_vtype \ew, \tp, \mp
|
|
/*
|
|
* The difference between the CLZ's notionally equals the VLMUL value
|
|
* for 4-bit elements. But we want the value for SEW_MAX-bit elements.
|
|
*/
|
|
slli \tmp, \rs, 1 + VSEW_MAX
|
|
.if \addend - 1
|
|
addi \tmp, \tmp, \addend - 1
|
|
.endif
|
|
csrr \rd, vlenb
|
|
clz \tmp, \tmp
|
|
clz \rd, \rd
|
|
sub \rd, \rd, \tmp
|
|
max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
|
|
.if vsew < VSEW_MAX
|
|
addi \rd, \rd, vsew - VSEW_MAX
|
|
andi \rd, \rd, 7
|
|
.endif
|
|
ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7)
|
|
.endm
|
|
|
|
/**
|
|
* Widens a vector type.
|
|
* @param[out] rd widened vector type destination register
|
|
* @param rs vector type source register
|
|
* @param n number of times to widen (once by default)
|
|
*/
|
|
.macro vwtypei rd, rs, n=1
|
|
xori \rd, \rs, 4
|
|
addi \rd, \rd, (\n) * 011
|
|
xori \rd, \rd, 4
|
|
.endm
|
|
|
|
/**
|
|
* Narrows a vector type.
|
|
* @param[out] rd narrowed vector type destination register
|
|
* @param rs vector type source register
|
|
* @param n number of times to narrow (once by default)
|
|
*/
|
|
.macro vntypei rd, rs, n=1
|
|
vwtypei \rd, \rs, -(\n)
|
|
.endm
|