mirror of https://git.ffmpeg.org/ffmpeg.git
rv34: NEON optimised 4x4 dequant
Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
40901fc14e
commit
4722a03c75
|
@ -25,9 +25,12 @@
|
|||
|
||||
void ff_rv34_inv_transform_neon(DCTELEM *block);
|
||||
void ff_rv34_inv_transform_noround_neon(DCTELEM *block);
|
||||
void ff_rv34_dequant4x4_neon(DCTELEM *block, int Qdc, int Q);
|
||||
|
||||
void ff_rv34dsp_init_neon(RV34DSPContext *c, DSPContext* dsp)
|
||||
{
|
||||
c->rv34_inv_transform_tab[0] = ff_rv34_inv_transform_neon;
|
||||
c->rv34_inv_transform_tab[1] = ff_rv34_inv_transform_noround_neon;
|
||||
|
||||
c->rv34_dequant4x4 = ff_rv34_dequant4x4_neon;
|
||||
}
|
||||
|
|
|
@ -107,3 +107,27 @@ function ff_rv34_inv_transform_noround_neon, export=1
|
|||
vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r2,:64], r1
|
||||
bx lr
|
||||
endfunc
|
||||
|
||||
function ff_rv34_dequant4x4_neon, export=1
|
||||
mov r3, r0
|
||||
mov r12, #16
|
||||
vdup.16 q0, r2
|
||||
vmov.16 d0[0], r1
|
||||
vld1.16 {d2}, [r0,:64], r12
|
||||
vld1.16 {d4}, [r0,:64], r12
|
||||
vld1.16 {d6}, [r0,:64], r12
|
||||
vld1.16 {d16}, [r0,:64], r12
|
||||
vmull.s16 q1, d2, d0
|
||||
vmull.s16 q2, d4, d1
|
||||
vmull.s16 q3, d6, d1
|
||||
vmull.s16 q8, d16, d1
|
||||
vqrshrn.s32 d2, q1, #4
|
||||
vqrshrn.s32 d4, q2, #4
|
||||
vqrshrn.s32 d6, q3, #4
|
||||
vqrshrn.s32 d16, q8, #4
|
||||
vst1.16 {d2}, [r3,:64], r12
|
||||
vst1.16 {d4}, [r3,:64], r12
|
||||
vst1.16 {d6}, [r3,:64], r12
|
||||
vst1.16 {d16}, [r3,:64], r12
|
||||
bx lr
|
||||
endfunc
|
||||
|
|
Loading…
Reference in New Issue