/*
 * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "config.h"
#include "libavutil/riscv/asm.S"

func ff_float_to_fixed24_rvv, zve32f
        li            t1, 1 << 24
        fcvt.s.w      f0, t1
1:
        vsetvli       t0, a2, e32, m8, ta, ma
        sub           a2, a2, t0
        vle32.v       v0, (a1)
        vfmul.vf      v0, v0, f0
        vfcvt.x.f.v   v0, v0
        sh2add        a1, t0, a1
        vse32.v       v0, (a0)
        sh2add        a0, t0, a0
        bnez          a2, 1b

        ret
endfunc

#if __riscv_xlen >= 64
func ff_sum_square_butterfly_int32_rvv, zve64x
        vsetvli    t0, zero, e64, m8, ta, ma
        vmv.v.x    v0, zero
        vmv.v.x    v8, zero
1:
        vsetvli    t0, a3, e32, m2, tu, ma
        vle32.v    v16, (a1)
        sub        a3, a3, t0
        vle32.v    v20, (a2)
        sh2add     a1, t0, a1
        vadd.vv    v24, v16, v20
        sh2add     a2, t0, a2
        vsub.vv    v28, v16, v20
        vwmacc.vv  v0, v16, v16
        vwmacc.vv  v4, v20, v20
        vwmacc.vv  v8, v24, v24
        vwmacc.vv  v12, v28, v28
        bnez       a3, 1b

        vsetvli    t0, zero, e64, m4, ta, ma
        vmv.s.x    v16, zero
        vmv.s.x    v17, zero
        vredsum.vs v16, v0, v16
        vmv.s.x    v18, zero
        vredsum.vs v17, v4, v17
        vmv.s.x    v19, zero
        vredsum.vs v18, v8, v18
        vmv.x.s    t0, v16
        vredsum.vs v19, v12, v19
        vmv.x.s    t1, v17
        sd         t0,   (a0)
        vmv.x.s    t2, v18
        sd         t1,  8(a0)
        vmv.x.s    t3, v19
        sd         t2, 16(a0)
        sd         t3, 24(a0)
        ret
endfunc
#endif