mirror of https://git.ffmpeg.org/ffmpeg.git
avcodec/aarch64/mpegvideoencdsp: add dotprod implementation for pix_norm1
A55 A76 pix_norm1_c: 484.3 235.2 pix_norm1_neon: 193.8 ( 2.50x) 44.7 ( 5.26x) pix_norm1_dotprod: 91.8 ( 5.28x) 21.2 (11.09x)
This commit is contained in:
parent
9f68a3712e
commit
8c203ea7c7
|
@ -27,6 +27,10 @@
|
|||
int ff_pix_sum16_neon(const uint8_t *pix, int line_size);
|
||||
int ff_pix_norm1_neon(const uint8_t *pix, int line_size);
|
||||
|
||||
#if HAVE_DOTPROD
|
||||
int ff_pix_norm1_neon_dotprod(const uint8_t *pix, int line_size);
|
||||
#endif
|
||||
|
||||
av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
|
||||
AVCodecContext *avctx)
|
||||
{
|
||||
|
@ -36,4 +40,10 @@ av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
|
|||
c->pix_sum = ff_pix_sum16_neon;
|
||||
c->pix_norm1 = ff_pix_norm1_neon;
|
||||
}
|
||||
|
||||
#if HAVE_DOTPROD
|
||||
if (have_dotprod(cpu_flags)) {
|
||||
c->pix_norm1 = ff_pix_norm1_neon_dotprod;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -66,3 +66,31 @@ function ff_pix_norm1_neon, export=1
|
|||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
#if HAVE_DOTPROD
|
||||
ENABLE_DOTPROD
|
||||
|
||||
function ff_pix_norm1_neon_dotprod, export=1
|
||||
// x0 const uint8_t *pix
|
||||
// x1 int line_size
|
||||
|
||||
sxtw x1, w1
|
||||
movi v0.16b, #0
|
||||
mov w2, #16
|
||||
|
||||
1:
|
||||
ld1 {v1.16b}, [x0], x1
|
||||
ld1 {v2.16b}, [x0], x1
|
||||
udot v0.4s, v1.16b, v1.16b
|
||||
subs w2, w2, #2
|
||||
udot v0.4s, v2.16b, v2.16b
|
||||
b.ne 1b
|
||||
|
||||
uaddlv d0, v0.4s
|
||||
fmov w0, s0
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
DISABLE_DOTPROD
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue