/* * Copyright © 2022 Rémi Denis-Courmont. * Loosely based on earlier work copyrighted by Måns Rullgård, 2008. * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #if defined (__riscv_float_abi_soft) #define NOHWF #define NOHWD #define HWF # #define HWD # #elif defined (__riscv_float_abi_single) #define NOHWF # #define NOHWD #define HWF #define HWD # #else #define NOHWF # #define NOHWD # #define HWF #define HWD #endif .macro archadd ext=, more:vararg .ifnb \ext .ifc \ext, b # B was defined later, is known to fewer assemblers. archadd zba, zbb, zbs .else .option arch, +\ext .endif archadd \more .endif .endm .macro func sym, exts:vararg .text .option push archadd \exts .global \sym .hidden \sym .type \sym, %function .option push .option norvc .align 2 \sym: .option pop .macro endfunc .size \sym, . - \sym .option pop .previous .purgem endfunc .endm .endm .macro const sym, align=3, relocate=0 .if \relocate .pushsection .data.rel.ro .else .pushsection .rodata .endif .align \align \sym: .macro endconst .size \sym, . - \sym .popsection .purgem endconst .endm .endm #if !defined (__riscv_zicfilp) .macro lpad lpl auipc zero, \lpl .endm #endif #if defined (__riscv_v_elen) # define RV_V_ELEN __riscv_v_elen #else /* Run-time detection of the V extension implies ELEN >= 64. */ # define RV_V_ELEN 64 #endif #if RV_V_ELEN == 32 # define VSEW_MAX 2 #else # define VSEW_MAX 3 #endif .macro parse_vtype ew, tp, mp .ifc \ew,e8 .equ vsew, 0 .else .ifc \ew,e16 .equ vsew, 1 .else .ifc \ew,e32 .equ vsew, 2 .else .ifc \ew,e64 .equ vsew, 3 .else .error "Unknown element width \ew" .endif .endif .endif .endif .ifc \tp,tu .equ tp, 0 .else .ifc \tp,ta .equ tp, 1 .else .error "Unknown tail policy \tp" .endif .endif .ifc \mp,mu .equ mp, 0 .else .ifc \mp,ma .equ mp, 1 .else .error "Unknown mask policy \mp" .endif .endif .endm /** * Gets the vector type with the smallest suitable LMUL value. * @param[out] rd vector type destination register * @param vl vector length constant * @param ew element width: e8, e16, e32 or e64 * @param tp tail policy: tu or ta * @param mp mask policty: mu or ma */ .macro vtype_ivli rd, avl, ew, tp=tu, mp=mu .if \avl <= 1 .equ log2vl, 0 .elseif \avl <= 2 .equ log2vl, 1 .elseif \avl <= 4 .equ log2vl, 2 .elseif \avl <= 8 .equ log2vl, 3 .elseif \avl <= 16 .equ log2vl, 4 .elseif \avl <= 32 .equ log2vl, 5 .elseif \avl <= 64 .equ log2vl, 6 .elseif \avl <= 128 .equ log2vl, 7 .else .error "Vector length \avl out of range" .endif parse_vtype \ew, \tp, \mp csrr \rd, vlenb clz \rd, \rd addi \rd, \rd, log2vl + 1 + VSEW_MAX - __riscv_xlen max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX .if vsew < VSEW_MAX addi \rd, \rd, vsew - VSEW_MAX andi \rd, \rd, 7 .endif ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7) .endm /** * Gets the vector type with the smallest suitable LMUL value. * @param[out] rd vector type destination register * @param rs vector length source register * @param[out] tmp temporary register to be clobbered * @param ew element width: e8, e16, e32 or e64 * @param tp tail policy: tu or ta * @param mp mask policty: mu or ma * @param addend optional addend for the vector length register */ .macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu, addend=0 parse_vtype \ew, \tp, \mp /* * The difference between the CLZ's notionally equals the VLMUL value * for 4-bit elements. But we want the value for SEW_MAX-bit elements. */ slli \tmp, \rs, 1 + VSEW_MAX .if \addend - 1 addi \tmp, \tmp, \addend - 1 .endif csrr \rd, vlenb clz \tmp, \tmp clz \rd, \rd sub \rd, \rd, \tmp max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX .if vsew < VSEW_MAX addi \rd, \rd, vsew - VSEW_MAX andi \rd, \rd, 7 .endif ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7) .endm /** * Widens a vector type. * @param[out] rd widened vector type destination register * @param rs vector type source register * @param n number of times to widen (once by default) */ .macro vwtypei rd, rs, n=1 xori \rd, \rs, 4 addi \rd, \rd, (\n) * 011 xori \rd, \rd, 4 .endm /** * Narrows a vector type. * @param[out] rd narrowed vector type destination register * @param rs vector type source register * @param n number of times to narrow (once by default) */ .macro vntypei rd, rs, n=1 vwtypei \rd, \rs, -(\n) .endm