swscale: [LA] Optimize swscale funcs in input.c

Optimized 7 funcs with LSX and LASX:
1. yuy2ToUV_c
2. yvy2ToUV_c
3. uyvyToUV_c
4. nv12ToUV_c
5. nv21ToUV_c
6. abgrToA_c
7. rgbaToA_c

Reviewed-by: colleague of Shiyou Yin
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
Shiyou Yin 2024-03-16 11:03:33 +08:00 committed by Michael Niedermayer
parent 8b76df9142
commit 2a7d622ddd
No known key found for this signature in database
GPG Key ID: B18E8928B3948D64
6 changed files with 652 additions and 18 deletions

View File

@ -9,4 +9,5 @@ LSX-OBJS-$(CONFIG_SWSCALE) += loongarch/swscale.o \
loongarch/input.o \
loongarch/output.o \
loongarch/output_lsx.o \
loongarch/input_lsx.o \
loongarch/yuv2rgb_lsx.o

View File

@ -283,3 +283,498 @@ function planar_rgb_to_uv_lsx
ld.d s3, sp, 16
addi.d sp, sp, 24
endfunc
/*
* void yuy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
* const uint8_t *src2, int width, uint32_t *unused, void *opq)
*/
function yuy2ToUV_lsx
andi t0, a5, 7
srli.d a5, a5, 3
beqz a5, 2f
1:
vld vr0, a3, 1
vld vr1, a3, 17
addi.d a5, a5, -1
addi.d a3, a3, 32
vpickev.b vr2, vr1, vr0
vpickev.b vr0, vr2, vr2
vpickod.b vr1, vr2, vr2
fst.d f0, a0, 0
fst.d f1, a1, 0
addi.d a0, a0, 8
addi.d a1, a1, 8
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 1
ld.b t2, a3, 3
addi.d a3, a3, 4
addi.d t0, t0, -1
st.b t1, a0, 0
st.b t2, a1, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
function yuy2ToUV_lasx
andi t0, a5, 15
srli.d a5, a5, 4
beqz a5, 2f
1:
xvld xr0, a3, 1
xvld xr1, a3, 33
addi.d a5, a5, -1
addi.d a3, a3, 64
xvpickev.b xr2, xr1, xr0
xvpermi.d xr2, xr2, 0xd8
xvpickev.b xr0, xr2, xr2
xvpermi.d xr0, xr0, 0xd8
xvpickod.b xr1, xr2, xr2
xvpermi.d xr1, xr1, 0xd8
vst vr0, a0, 0
vst vr1, a1, 0
addi.d a0, a0, 16
addi.d a1, a1, 16
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 1
ld.b t2, a3, 3
addi.d a3, a3, 4
addi.d t0, t0, -1
st.b t1, a0, 0
st.b t2, a1, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
/*
* void yvy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
* const uint8_t *src2, int width, uint32_t *unused, void *opq)
*/
function yvy2ToUV_lsx
andi t0, a5, 7
srli.d a5, a5, 3
beqz a5, 2f
1:
vld vr0, a3, 1
vld vr1, a3, 17
addi.d a5, a5, -1
addi.d a3, a3, 32
vpickev.b vr2, vr1, vr0
vpickev.b vr0, vr2, vr2
vpickod.b vr1, vr2, vr2
fst.d f0, a1, 0
fst.d f1, a0, 0
addi.d a0, a0, 8
addi.d a1, a1, 8
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 1
ld.b t2, a3, 3
addi.d a3, a3, 4
addi.d t0, t0, -1
st.b t1, a1, 0
st.b t2, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
function yvy2ToUV_lasx
andi t0, a5, 15
srli.d a5, a5, 4
beqz a5, 2f
1:
xvld xr0, a3, 1
xvld xr1, a3, 33
addi.d a5, a5, -1
addi.d a3, a3, 64
xvpickev.b xr2, xr1, xr0
xvpermi.d xr2, xr2, 0xd8
xvpickev.b xr0, xr2, xr2
xvpermi.d xr0, xr0, 0xd8
xvpickod.b xr1, xr2, xr2
xvpermi.d xr1, xr1, 0xd8
vst vr0, a1, 0
vst vr1, a0, 0
addi.d a0, a0, 16
addi.d a1, a1, 16
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 1
ld.b t2, a3, 3
addi.d a3, a3, 4
addi.d t0, t0, -1
st.b t1, a1, 0
st.b t2, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
/*
* void uyvyToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
* const uint8_t *src2, int width, uint32_t *unused, void *opq)
*/
function uyvyToUV_lsx
andi t0, a5, 7
srli.d a5, a5, 3
beqz a5, 2f
1:
vld vr0, a3, 0
vld vr1, a3, 16
addi.d a5, a5, -1
addi.d a3, a3, 32
vpickev.b vr2, vr1, vr0
vpickev.b vr0, vr2, vr2
vpickod.b vr1, vr2, vr2
fst.d f0, a0, 0
fst.d f1, a1, 0
addi.d a0, a0, 8
addi.d a1, a1, 8
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 1
ld.b t2, a3, 3
addi.d a3, a3, 4
addi.d t0, t0, -1
st.b t1, a0, 0
st.b t2, a1, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
function uyvyToUV_lasx
andi t0, a5, 15
srli.d a5, a5, 4
beqz a5, 2f
1:
xvld xr0, a3, 0
xvld xr1, a3, 32
addi.d a5, a5, -1
addi.d a3, a3, 64
xvpickev.b xr2, xr1, xr0
xvpermi.d xr2, xr2, 0xd8
xvpickev.b xr0, xr2, xr2
xvpermi.d xr0, xr0, 0xd8
xvpickod.b xr1, xr2, xr2
xvpermi.d xr1, xr1, 0xd8
vst vr0, a0, 0
vst vr1, a1, 0
addi.d a0, a0, 16
addi.d a1, a1, 16
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 1
ld.b t2, a3, 3
addi.d a3, a3, 4
addi.d t0, t0, -1
st.b t1, a0, 0
st.b t2, a1, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
/*
* void nv12ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
* const uint8_t *src2, int width, uint32_t *unused, void *opq)
*/
function nv12ToUV_lsx
andi t0, a5, 15
srli.d a5, a5, 4
beqz a5, 2f
1:
vld vr0, a3, 0
vld vr1, a3, 16
addi.d a5, a5, -1
addi.d a3, a3, 32
vpickev.b vr2, vr1, vr0
vpickod.b vr3, vr1, vr0
vst vr2, a0, 0
vst vr3, a1, 0
addi.d a0, a0, 16
addi.d a1, a1, 16
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 0
ld.b t2, a3, 1
addi.d a3, a3, 2
addi.d t0, t0, -1
st.b t1, a0, 0
st.b t2, a1, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
function nv12ToUV_lasx
andi t0, a5, 31
srli.d a5, a5, 5
beqz a5, 2f
1:
xvld xr0, a3, 0
xvld xr1, a3, 32
addi.d a5, a5, -1
addi.d a3, a3, 64
xvpickev.b xr2, xr1, xr0
xvpickod.b xr3, xr1, xr0
xvpermi.d xr2, xr2, 0xd8
xvpermi.d xr3, xr3, 0xd8
xvst xr2, a0, 0
xvst xr3, a1, 0
addi.d a0, a0, 32
addi.d a1, a1, 32
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 0
ld.b t2, a3, 1
addi.d a3, a3, 2
addi.d t0, t0, -1
st.b t1, a0, 0
st.b t2, a1, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
/*
* void nv21ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
* const uint8_t *src2, int width, uint32_t *unused, void *opq)
*/
function nv21ToUV_lsx
andi t0, a5, 15
srli.d a5, a5, 4
beqz a5, 2f
1:
vld vr0, a3, 0
vld vr1, a3, 16
addi.d a5, a5, -1
addi.d a3, a3, 32
vpickev.b vr2, vr1, vr0
vpickod.b vr3, vr1, vr0
vst vr2, a1, 0
vst vr3, a0, 0
addi.d a0, a0, 16
addi.d a1, a1, 16
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 0
ld.b t2, a3, 1
addi.d a3, a3, 2
addi.d t0, t0, -1
st.b t1, a1, 0
st.b t2, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
function nv21ToUV_lasx
andi t0, a5, 31
srli.d a5, a5, 5
beqz a5, 2f
1:
xvld xr0, a3, 0
xvld xr1, a3, 32
addi.d a5, a5, -1
addi.d a3, a3, 64
xvpickev.b xr2, xr1, xr0
xvpickod.b xr3, xr1, xr0
xvpermi.d xr2, xr2, 0xd8
xvpermi.d xr3, xr3, 0xd8
xvst xr2, a1, 0
xvst xr3, a0, 0
addi.d a0, a0, 32
addi.d a1, a1, 32
bnez a5, 1b
2:
beqz t0, 4f
3:
ld.b t1, a3, 0
ld.b t2, a3, 1
addi.d a3, a3, 2
addi.d t0, t0, -1
st.b t1, a1, 0
st.b t2, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, 1
bnez t0, 3b
4:
endfunc
/*
*void abgrToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
* const uint8_t *unused2, int width, uint32_t *unused, void *opq)
*/
function abgrToA_lsx
andi t0, a4, 7
srli.d a4, a4, 3
vxor.v vr0, vr0, vr0
beqz a4, 2f
1:
vld vr1, a1, 0
vld vr2, a1, 16
addi.d a4, a4, -1
addi.d a1, a1, 32
vpickev.b vr3, vr2, vr1
vpackev.b vr3, vr0, vr3
vslli.h vr1, vr3, 6
vsrli.h vr2, vr3, 2
vor.v vr3, vr2, vr1
vst vr3, a0, 0
addi.d a0, a0, 16
bnez a4, 1b
2:
beqz t0, 4f
3:
ld.b t1, a1, 3
addi.d t0, t0, -1
addi.d a1, a1, 4
andi t1, t1, 0xff
slli.w t2, t1, 6
srli.w t3, t1, 2
or t1, t2, t3
st.h t1, a0, 0
addi.d a0, a0, 2
bnez t0, 3b
4:
endfunc
function abgrToA_lasx
andi t0, a4, 15
srli.d a4, a4, 4
xvxor.v xr0, xr0, xr0
beqz a4, 2f
1:
xvld xr1, a1, 0
xvld xr2, a1, 32
addi.d a4, a4, -1
addi.d a1, a1, 64
xvpickev.b xr3, xr2, xr1
xvpermi.d xr3, xr3, 0xd8
xvpackev.b xr3, xr0, xr3
xvslli.h xr1, xr3, 6
xvsrli.h xr2, xr3, 2
xvor.v xr3, xr2, xr1
xvst xr3, a0, 0
addi.d a0, a0, 32
bnez a4, 1b
2:
beqz t0, 4f
3:
ld.b t1, a1, 3
addi.d t0, t0, -1
addi.d a1, a1, 4
andi t1, t1, 0xff
slli.w t2, t1, 6
srli.w t3, t1, 2
or t1, t2, t3
st.h t1, a0, 0
addi.d a0, a0, 2
bnez t0, 3b
4:
endfunc
/*
*void rgbaToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
* const uint8_t *unused2, int width, uint32_t *unused, void *opq)
*/
function rgbaToA_lsx
andi t0, a4, 7
srli.d a4, a4, 3
vxor.v vr0, vr0, vr0
beqz a4, 2f
1:
vld vr1, a1, 3
vld vr2, a1, 19
addi.d a4, a4, -1
addi.d a1, a1, 32
vpickev.b vr3, vr2, vr1
vpackev.b vr3, vr0, vr3
vslli.h vr1, vr3, 6
vsrli.h vr2, vr3, 2
vor.v vr3, vr2, vr1
vst vr3, a0, 0
addi.d a0, a0, 16
bnez a4, 1b
2:
beqz t0, 4f
3:
ld.b t1, a1, 3
addi.d t0, t0, -1
addi.d a1, a1, 4
andi t1, t1, 0xff
slli.w t2, t1, 6
srli.w t3, t1, 2
or t1, t2, t3
st.h t1, a0, 0
addi.d a0, a0, 2
bnez t0, 3b
4:
endfunc
function rgbaToA_lasx
andi t0, a4, 15
srli.d a4, a4, 4
xvxor.v xr0, xr0, xr0
beqz a4, 2f
1:
xvld xr1, a1, 3
xvld xr2, a1, 35
addi.d a4, a4, -1
addi.d a1, a1, 64
xvpickev.b xr3, xr2, xr1
xvpermi.d xr3, xr3, 0xd8
xvpackev.b xr3, xr0, xr3
xvslli.h xr1, xr3, 6
xvsrli.h xr2, xr3, 2
xvor.v xr3, xr2, xr1
xvst xr3, a0, 0
addi.d a0, a0, 32
bnez a4, 1b
2:
beqz t0, 4f
3:
ld.b t1, a1, 3
addi.d t0, t0, -1
addi.d a1, a1, 4
andi t1, t1, 0xff
slli.w t2, t1, 6
srli.w t3, t1, 2
or t1, t2, t3
st.h t1, a0, 0
addi.d a0, a0, 2
bnez t0, 3b
4:
endfunc

View File

@ -200,3 +200,46 @@ void planar_rgb_to_y_lasx(uint8_t *_dst, const uint8_t *src[4], int width,
dst[i] = (tem_ry * r + tem_gy * g + tem_by * b + set) >> shift;
}
}
av_cold void ff_sws_init_input_lasx(SwsContext *c)
{
enum AVPixelFormat srcFormat = c->srcFormat;
switch (srcFormat) {
case AV_PIX_FMT_YUYV422:
c->chrToYV12 = yuy2ToUV_lasx;
break;
case AV_PIX_FMT_YVYU422:
c->chrToYV12 = yvy2ToUV_lasx;
break;
case AV_PIX_FMT_UYVY422:
c->chrToYV12 = uyvyToUV_lasx;
break;
case AV_PIX_FMT_NV12:
case AV_PIX_FMT_NV16:
case AV_PIX_FMT_NV24:
c->chrToYV12 = nv12ToUV_lasx;
break;
case AV_PIX_FMT_NV21:
case AV_PIX_FMT_NV42:
c->chrToYV12 = nv21ToUV_lasx;
break;
case AV_PIX_FMT_GBRAP:
case AV_PIX_FMT_GBRP:
c->readChrPlanar = planar_rgb_to_uv_lasx;
break;
}
if (c->needAlpha) {
switch (srcFormat) {
case AV_PIX_FMT_BGRA:
case AV_PIX_FMT_RGBA:
c->alpToYV12 = rgbaToA_lasx;
break;
case AV_PIX_FMT_ABGR:
case AV_PIX_FMT_ARGB:
c->alpToYV12 = abgrToA_lasx;
break;
}
}
}

View File

@ -0,0 +1,65 @@
/*
* Copyright (C) 2024 Loongson Technology Corporation Limited
* Contributed by Shiyou Yin<yinshiyou-hf@loongson.cn>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "swscale_loongarch.h"
av_cold void ff_sws_init_input_lsx(SwsContext *c)
{
enum AVPixelFormat srcFormat = c->srcFormat;
switch (srcFormat) {
case AV_PIX_FMT_YUYV422:
c->chrToYV12 = yuy2ToUV_lsx;
break;
case AV_PIX_FMT_YVYU422:
c->chrToYV12 = yvy2ToUV_lsx;
break;
case AV_PIX_FMT_UYVY422:
c->chrToYV12 = uyvyToUV_lsx;
break;
case AV_PIX_FMT_NV12:
case AV_PIX_FMT_NV16:
case AV_PIX_FMT_NV24:
c->chrToYV12 = nv12ToUV_lsx;
break;
case AV_PIX_FMT_NV21:
case AV_PIX_FMT_NV42:
c->chrToYV12 = nv21ToUV_lsx;
break;
case AV_PIX_FMT_GBRAP:
case AV_PIX_FMT_GBRP:
c->readChrPlanar = planar_rgb_to_uv_lsx;
break;
}
if (c->needAlpha) {
switch (srcFormat) {
case AV_PIX_FMT_BGRA:
case AV_PIX_FMT_RGBA:
c->alpToYV12 = rgbaToA_lsx;
break;
case AV_PIX_FMT_ABGR:
case AV_PIX_FMT_ARGB:
c->alpToYV12 = abgrToA_lsx;
break;
}
}
}

View File

@ -63,6 +63,7 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
ff_sws_init_output_lsx(c, &c->yuv2plane1, &c->yuv2planeX,
&c->yuv2nv12cX, &c->yuv2packed1,
&c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
ff_sws_init_input_lsx(c);
if (c->srcBpc == 8) {
if (c->dstBpc <= 14) {
c->hyScale = c->hcScale = ff_hscale_8_to_15_lsx;
@ -73,21 +74,13 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
c->hyScale = c->hcScale = c->dstBpc > 14 ? ff_hscale_16_to_19_lsx
: ff_hscale_16_to_15_lsx;
}
switch (c->srcFormat) {
case AV_PIX_FMT_GBRAP:
case AV_PIX_FMT_GBRP:
{
c->readChrPlanar = planar_rgb_to_uv_lsx;
c->readLumPlanar = planar_rgb_to_y_lsx;
}
break;
}
}
#if HAVE_LASX
if (have_lasx(cpu_flags)) {
ff_sws_init_output_lasx(c, &c->yuv2plane1, &c->yuv2planeX,
&c->yuv2nv12cX, &c->yuv2packed1,
&c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
ff_sws_init_input_lasx(c);
if (c->srcBpc == 8) {
if (c->dstBpc <= 14) {
c->hyScale = c->hcScale = ff_hscale_8_to_15_lasx;
@ -98,15 +91,6 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
c->hyScale = c->hcScale = c->dstBpc > 14 ? ff_hscale_16_to_19_lasx
: ff_hscale_16_to_15_lasx;
}
switch (c->srcFormat) {
case AV_PIX_FMT_GBRAP:
case AV_PIX_FMT_GBRP:
{
c->readChrPlanar = planar_rgb_to_uv_lasx;
c->readLumPlanar = planar_rgb_to_y_lasx;
}
break;
}
}
#endif // #if HAVE_LASX
ff_sws_init_range_convert_loongarch(c);

View File

@ -68,6 +68,29 @@ void yuv2planeX_8_lsx(const int16_t *filter, int filterSize,
void yuv2plane1_8_lsx(const int16_t *src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset);
void yuy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void yvy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void uyvyToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void nv12ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void nv21ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void abgrToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
const uint8_t *unused2, int width, uint32_t *unused, void *opq);
void rgbaToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
const uint8_t *unused2, int width, uint32_t *unused, void *opq);
av_cold void ff_sws_init_input_lsx(SwsContext *c);
av_cold void ff_sws_init_output_lsx(SwsContext *c,
yuv2planar1_fn *yuv2plane1,
yuv2planarX_fn *yuv2planeX,
@ -152,6 +175,29 @@ void yuv2planeX_8_lasx(const int16_t *filter, int filterSize,
void yuv2plane1_8_lasx(const int16_t *src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset);
void yuy2ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void yvy2ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void uyvyToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void nv12ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void nv21ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused, void *opq);
void abgrToA_lasx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
const uint8_t *unused2, int width, uint32_t *unused, void *opq);
void rgbaToA_lasx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
const uint8_t *unused2, int width, uint32_t *unused, void *opq);
av_cold void ff_sws_init_input_lasx(SwsContext *c);
av_cold void ff_sws_init_output_lasx(SwsContext *c,
yuv2planar1_fn *yuv2plane1,
yuv2planarX_fn *yuv2planeX,