lavc/h264dsp: R-V V 8-bit h264_idct_add16intra

This commit is contained in:
Rémi Denis-Courmont 2024-07-01 23:41:37 +03:00
parent 30475c95ba
commit d1f0c1fbf8
2 changed files with 72 additions and 0 deletions

View File

@ -37,6 +37,9 @@ void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
void ff_h264_idct_add16_8_rvv(uint8_t *dst, const int *blockoffset,
int16_t *block, int stride,
const uint8_t nnzc[5 * 8]);
void ff_h264_idct_add16intra_8_rvv(uint8_t *dst, const int *blockoffset,
int16_t *block, int stride,
const uint8_t nnzc[5 * 8]);
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@ -59,6 +62,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
# if __riscv_xlen == 64
dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv;
dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
# endif
}
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;

View File

@ -102,5 +102,73 @@ func ff_h264_idct_add16_\depth\()_rvv, zve32x
addi sp, sp, 80
ret
endfunc
func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
addi sp, sp, -80
lla t0, ff_h264_scan8
sd s0, (sp)
li t1, 32 << (\depth > 8)
mv s0, sp
sd ra, 8(sp)
sd s1, 16(sp)
sd s2, 24(sp)
sd s3, 32(sp)
sd s4, 40(sp)
sd s5, 48(sp)
sd s6, 56(sp)
sd s7, 64(sp)
vsetivli zero, 16, e8, m1, ta, ma
vle8.v v8, (t0)
vlse16.v v16, (a2), t1
vluxei8.v v12, (a4), v8
.if \depth == 8
vsetvli zero, zero, e16, m2, ta, ma
.else
vsetvli zero, zero, e32, m4, ta, ma
.endif
vmsne.vi v1, v16, 0
vsetvli zero, zero, e8, m1, ta, ma
vmsne.vi v0, v12, 0
vsetvli zero, zero, e16, m2, ta, ma
vmv.x.s s2, v0
vmv.x.s s3, v1
li s1, 16
mv s4, a0
mv s5, a1
mv s6, a2
mv s7, a3
1:
andi t0, s2, 1
addi s1, s1, -1
srli s2, s2, 1
lw t2, (s5) # block_offset[i]
andi t1, s3, 1
mv a1, s6
mv a2, s7
add a0, s4, t2
beqz t0, 2f # if (nnzc[scan8[i]])
call ff_h264_idct_add_\depth\()_c
j 3f
2:
beqz t1, 3f # if (block[i * 16])
call ff_h264_idct_dc_add_\depth\()_c
3:
srli s3, s3, 1
addi s5, s5, 4
addi s6, s6, 16 * 2 << (\depth > 8)
bnez s1, 1b
ld s7, 64(sp)
ld s6, 56(sp)
ld s5, 48(sp)
ld s4, 40(sp)
ld s3, 32(sp)
ld s2, 24(sp)
ld s1, 16(sp)
ld ra, 8(sp)
ld s0, 0(sp)
addi sp, sp, 80
ret
endfunc
.endr
#endif