lavc/h264idct: fix RISC-V group multiplier

After the branch, the expected SEW/LMUL ratio is 1 byte/vector.
So we have to set the same ratio before branching (QEMU does not care,
but real hardware does).
This commit is contained in:
Rémi Denis-Courmont 2024-11-16 16:22:49 +02:00
parent 8a5b74f98b
commit bbb0fdedb7
1 changed files with 4 additions and 4 deletions

View File

@ -644,7 +644,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x
sd a4, 24(sp) sd a4, 24(sp)
ld a0, 0(a0) # dest[0] ld a0, 0(a0) # dest[0]
addi a1, a1, 16 * 4 # &block_offset[16] addi a1, a1, 16 * 4 # &block_offset[16]
vsetivli zero, 4, e8, mf4, ta, ma vsetivli zero, 4, e8, m1, ta, ma
jal .Lidct4_add4_\depth\()_rvv jal .Lidct4_add4_\depth\()_rvv
ld a4, 24(sp) # nnzc ld a4, 24(sp) # nnzc
@ -655,7 +655,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x
ld a0, 8(a0) # dest[1] ld a0, 8(a0) # dest[1]
lla t0, ff_h264_scan8 + 32 lla t0, ff_h264_scan8 + 32
.ifc \type, 8_422 .ifc \type, 8_422
vsetivli zero, 4, e8, mf4, ta, ma vsetivli zero, 4, e8, m1, ta, ma
jal .Lidct4_add4_\depth\()_rvv jal .Lidct4_add4_\depth\()_rvv
ld a4, 24(sp) # nnzc ld a4, 24(sp) # nnzc
@ -665,7 +665,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x
addi a1, t5, (-8 - 4) * 4 # &block_offset[24] addi a1, t5, (-8 - 4) * 4 # &block_offset[24]
ld a0, 0(a0) # dest[0] ld a0, 0(a0) # dest[0]
lla t0, ff_h264_scan8 + 24 lla t0, ff_h264_scan8 + 24
vsetivli zero, 4, e8, mf4, ta, ma vsetivli zero, 4, e8, m1, ta, ma
jal .Lidct4_add4_\depth\()_rvv jal .Lidct4_add4_\depth\()_rvv
ld a4, 24(sp) # nnzc ld a4, 24(sp) # nnzc
@ -679,7 +679,7 @@ func ff_h264_idct4_add\type\()_\depth\()_rvv, zve32x
ld ra, 8(sp) ld ra, 8(sp)
ld s0, 0(sp) ld s0, 0(sp)
addi sp, sp, 32 addi sp, sp, 32
vsetivli zero, 4, e8, mf4, ta, ma vsetivli zero, 4, e8, m1, ta, ma
j .Lidct4_add4_\depth\()_rvv j .Lidct4_add4_\depth\()_rvv
endfunc endfunc
.endm .endm