From bbb0fdedb78c289f1d3ce3fccb000f031ccfa748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sat, 16 Nov 2024 16:22:49 +0200 Subject: [PATCH] lavc/h264idct: fix RISC-V group multiplier After the branch, the expected SEW/LMUL ratio is 1 byte/vector. So we have to set the same ratio before branching (QEMU does not care, but real hardware does). --- libavcodec/riscv/h264idct_rvv.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index fda1f37a3c..5adcfd295e 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -644,7 +644,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x sd a4, 24(sp) ld a0, 0(a0) # dest[0] addi a1, a1, 16 * 4 # &block_offset[16] - vsetivli zero, 4, e8, mf4, ta, ma + vsetivli zero, 4, e8, m1, ta, ma jal .Lidct4_add4_\depth\()_rvv ld a4, 24(sp) # nnzc @@ -655,7 +655,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x ld a0, 8(a0) # dest[1] lla t0, ff_h264_scan8 + 32 .ifc \type, 8_422 - vsetivli zero, 4, e8, mf4, ta, ma + vsetivli zero, 4, e8, m1, ta, ma jal .Lidct4_add4_\depth\()_rvv ld a4, 24(sp) # nnzc @@ -665,7 +665,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x addi a1, t5, (-8 - 4) * 4 # &block_offset[24] ld a0, 0(a0) # dest[0] lla t0, ff_h264_scan8 + 24 - vsetivli zero, 4, e8, mf4, ta, ma + vsetivli zero, 4, e8, m1, ta, ma jal .Lidct4_add4_\depth\()_rvv ld a4, 24(sp) # nnzc @@ -679,7 +679,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x ld ra, 8(sp) ld s0, 0(sp) addi sp, sp, 32 - vsetivli zero, 4, e8, mf4, ta, ma + vsetivli zero, 4, e8, m1, ta, ma j .Lidct4_add4_\depth\()_rvv endfunc .endm