226 lines
8.8 KiB
Diff
226 lines
8.8 KiB
Diff
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
|
|
index 00f93bf59f..52da7036f3 100644
|
|
--- a/libavcodec/aarch64/Makefile
|
|
+++ b/libavcodec/aarch64/Makefile
|
|
@@ -6,6 +6,7 @@ OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
|
|
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
|
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
|
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
|
|
+OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_init_aarch64.o
|
|
OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_init.o
|
|
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o
|
|
OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp_init.o
|
|
@@ -21,6 +22,7 @@ OBJS-$(CONFIG_VC1DSP) += aarch64/vc1dsp_init_aarch64.o
|
|
OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o
|
|
OBJS-$(CONFIG_VP9_DECODER) += aarch64/vp9dsp_init_10bpp_aarch64.o \
|
|
aarch64/vp9dsp_init_12bpp_aarch64.o \
|
|
+ aarch64/vp9mc_aarch64.o \
|
|
aarch64/vp9dsp_init_aarch64.o
|
|
|
|
# ARMv8 optimizations
|
|
@@ -41,8 +43,7 @@ NEON-OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_neon.o
|
|
NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o \
|
|
aarch64/hpeldsp_neon.o
|
|
NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o
|
|
-NEON-OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_init_aarch64.o \
|
|
- aarch64/simple_idct_neon.o
|
|
+NEON-OBJS-$(CONFIG_IDCTDSP) += aarch64/simple_idct_neon.o
|
|
NEON-OBJS-$(CONFIG_MDCT) += aarch64/mdct_neon.o
|
|
NEON-OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_neon.o
|
|
NEON-OBJS-$(CONFIG_VP8DSP) += aarch64/vp8dsp_neon.o
|
|
diff --git a/libavcodec/aarch64/idctdsp_init_aarch64.c b/libavcodec/aarch64/idctdsp_init_aarch64.c
|
|
index 0406e60830..742a3372e3 100644
|
|
--- a/libavcodec/aarch64/idctdsp_init_aarch64.c
|
|
+++ b/libavcodec/aarch64/idctdsp_init_aarch64.c
|
|
@@ -21,6 +21,8 @@
|
|
*/
|
|
|
|
#include "libavutil/attributes.h"
|
|
+#include "libavutil/cpu.h"
|
|
+#include "libavutil/arm/cpu.h"
|
|
#include "libavcodec/avcodec.h"
|
|
#include "libavcodec/idctdsp.h"
|
|
#include "idct.h"
|
|
@@ -28,7 +30,9 @@
|
|
av_cold void ff_idctdsp_init_aarch64(IDCTDSPContext *c, AVCodecContext *avctx,
|
|
unsigned high_bit_depth)
|
|
{
|
|
- if (!avctx->lowres && !high_bit_depth) {
|
|
+ int cpu_flags = av_get_cpu_flags();
|
|
+
|
|
+ if (have_neon(cpu_flags) && !avctx->lowres && !high_bit_depth) {
|
|
if (avctx->idct_algo == FF_IDCT_AUTO ||
|
|
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
|
|
avctx->idct_algo == FF_IDCT_SIMPLENEON) {
|
|
diff --git a/libavcodec/aarch64/vp9mc_16bpp_neon.S b/libavcodec/aarch64/vp9mc_16bpp_neon.S
|
|
index cac6428709..53b372c262 100644
|
|
--- a/libavcodec/aarch64/vp9mc_16bpp_neon.S
|
|
+++ b/libavcodec/aarch64/vp9mc_16bpp_neon.S
|
|
@@ -25,31 +25,6 @@
|
|
// const uint8_t *ref, ptrdiff_t ref_stride,
|
|
// int h, int mx, int my);
|
|
|
|
-function ff_vp9_copy128_aarch64, export=1
|
|
-1:
|
|
- ldp x5, x6, [x2]
|
|
- ldp x7, x8, [x2, #16]
|
|
- stp x5, x6, [x0]
|
|
- ldp x9, x10, [x2, #32]
|
|
- stp x7, x8, [x0, #16]
|
|
- subs w4, w4, #1
|
|
- ldp x11, x12, [x2, #48]
|
|
- stp x9, x10, [x0, #32]
|
|
- stp x11, x12, [x0, #48]
|
|
- ldp x5, x6, [x2, #64]
|
|
- ldp x7, x8, [x2, #80]
|
|
- stp x5, x6, [x0, #64]
|
|
- ldp x9, x10, [x2, #96]
|
|
- stp x7, x8, [x0, #80]
|
|
- ldp x11, x12, [x2, #112]
|
|
- stp x9, x10, [x0, #96]
|
|
- stp x11, x12, [x0, #112]
|
|
- add x2, x2, x3
|
|
- add x0, x0, x1
|
|
- b.ne 1b
|
|
- ret
|
|
-endfunc
|
|
-
|
|
function ff_vp9_avg64_16_neon, export=1
|
|
mov x5, x0
|
|
sub x1, x1, #64
|
|
diff --git a/libavcodec/aarch64/vp9mc_aarch64.S b/libavcodec/aarch64/vp9mc_aarch64.S
|
|
new file mode 100644
|
|
index 0000000000..f17a8cf04a
|
|
--- /dev/null
|
|
+++ b/libavcodec/aarch64/vp9mc_aarch64.S
|
|
@@ -0,0 +1,81 @@
|
|
+/*
|
|
+ * Copyright (c) 2016 Google Inc.
|
|
+ *
|
|
+ * This file is part of FFmpeg.
|
|
+ *
|
|
+ * FFmpeg is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU Lesser General Public
|
|
+ * License as published by the Free Software Foundation; either
|
|
+ * version 2.1 of the License, or (at your option) any later version.
|
|
+ *
|
|
+ * FFmpeg is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * Lesser General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU Lesser General Public
|
|
+ * License along with FFmpeg; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ */
|
|
+
|
|
+#include "libavutil/aarch64/asm.S"
|
|
+
|
|
+// All public functions in this file have the following signature:
|
|
+// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
|
|
+// const uint8_t *ref, ptrdiff_t ref_stride,
|
|
+// int h, int mx, int my);
|
|
+
|
|
+function ff_vp9_copy128_aarch64, export=1
|
|
+1:
|
|
+ ldp x5, x6, [x2]
|
|
+ ldp x7, x8, [x2, #16]
|
|
+ stp x5, x6, [x0]
|
|
+ ldp x9, x10, [x2, #32]
|
|
+ stp x7, x8, [x0, #16]
|
|
+ subs w4, w4, #1
|
|
+ ldp x11, x12, [x2, #48]
|
|
+ stp x9, x10, [x0, #32]
|
|
+ stp x11, x12, [x0, #48]
|
|
+ ldp x5, x6, [x2, #64]
|
|
+ ldp x7, x8, [x2, #80]
|
|
+ stp x5, x6, [x0, #64]
|
|
+ ldp x9, x10, [x2, #96]
|
|
+ stp x7, x8, [x0, #80]
|
|
+ ldp x11, x12, [x2, #112]
|
|
+ stp x9, x10, [x0, #96]
|
|
+ stp x11, x12, [x0, #112]
|
|
+ add x2, x2, x3
|
|
+ add x0, x0, x1
|
|
+ b.ne 1b
|
|
+ ret
|
|
+endfunc
|
|
+
|
|
+function ff_vp9_copy64_aarch64, export=1
|
|
+1:
|
|
+ ldp x5, x6, [x2]
|
|
+ ldp x7, x8, [x2, #16]
|
|
+ stp x5, x6, [x0]
|
|
+ ldp x9, x10, [x2, #32]
|
|
+ stp x7, x8, [x0, #16]
|
|
+ subs w4, w4, #1
|
|
+ ldp x11, x12, [x2, #48]
|
|
+ stp x9, x10, [x0, #32]
|
|
+ stp x11, x12, [x0, #48]
|
|
+ add x2, x2, x3
|
|
+ add x0, x0, x1
|
|
+ b.ne 1b
|
|
+ ret
|
|
+endfunc
|
|
+
|
|
+function ff_vp9_copy32_aarch64, export=1
|
|
+1:
|
|
+ ldp x5, x6, [x2]
|
|
+ ldp x7, x8, [x2, #16]
|
|
+ stp x5, x6, [x0]
|
|
+ subs w4, w4, #1
|
|
+ stp x7, x8, [x0, #16]
|
|
+ add x2, x2, x3
|
|
+ add x0, x0, x1
|
|
+ b.ne 1b
|
|
+ ret
|
|
+endfunc
|
|
diff --git a/libavcodec/aarch64/vp9mc_neon.S b/libavcodec/aarch64/vp9mc_neon.S
|
|
index f67624ca04..abf2bae9db 100644
|
|
--- a/libavcodec/aarch64/vp9mc_neon.S
|
|
+++ b/libavcodec/aarch64/vp9mc_neon.S
|
|
@@ -25,23 +25,6 @@
|
|
// const uint8_t *ref, ptrdiff_t ref_stride,
|
|
// int h, int mx, int my);
|
|
|
|
-function ff_vp9_copy64_aarch64, export=1
|
|
-1:
|
|
- ldp x5, x6, [x2]
|
|
- ldp x7, x8, [x2, #16]
|
|
- stp x5, x6, [x0]
|
|
- ldp x9, x10, [x2, #32]
|
|
- stp x7, x8, [x0, #16]
|
|
- subs w4, w4, #1
|
|
- ldp x11, x12, [x2, #48]
|
|
- stp x9, x10, [x0, #32]
|
|
- stp x11, x12, [x0, #48]
|
|
- add x2, x2, x3
|
|
- add x0, x0, x1
|
|
- b.ne 1b
|
|
- ret
|
|
-endfunc
|
|
-
|
|
function ff_vp9_avg64_neon, export=1
|
|
mov x5, x0
|
|
1:
|
|
@@ -64,19 +47,6 @@ function ff_vp9_avg64_neon, export=1
|
|
ret
|
|
endfunc
|
|
|
|
-function ff_vp9_copy32_aarch64, export=1
|
|
-1:
|
|
- ldp x5, x6, [x2]
|
|
- ldp x7, x8, [x2, #16]
|
|
- stp x5, x6, [x0]
|
|
- subs w4, w4, #1
|
|
- stp x7, x8, [x0, #16]
|
|
- add x2, x2, x3
|
|
- add x0, x0, x1
|
|
- b.ne 1b
|
|
- ret
|
|
-endfunc
|
|
-
|
|
function ff_vp9_avg32_neon, export=1
|
|
1:
|
|
ld1 {v2.16b, v3.16b}, [x2], x3
|