mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2025-01-30 03:13:18 +00:00
ARM: add new h264 idct functions
Originally committed as revision 16312 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
337e3fd990
commit
760badc1df
@ -94,6 +94,15 @@ void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
|
|||||||
|
|
||||||
void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
|
void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
|
||||||
void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
|
void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
|
||||||
|
void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
|
||||||
|
DCTELEM *block, int stride,
|
||||||
|
const uint8_t nnzc[6*8]);
|
||||||
|
void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
|
||||||
|
DCTELEM *block, int stride,
|
||||||
|
const uint8_t nnzc[6*8]);
|
||||||
|
void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
|
||||||
|
DCTELEM *block, int stride,
|
||||||
|
const uint8_t nnzc[6*8]);
|
||||||
|
|
||||||
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
||||||
{
|
{
|
||||||
@ -166,4 +175,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
|||||||
|
|
||||||
c->h264_idct_add = ff_h264_idct_add_neon;
|
c->h264_idct_add = ff_h264_idct_add_neon;
|
||||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
|
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
|
||||||
|
c->h264_idct_add16 = ff_h264_idct_add16_neon;
|
||||||
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
|
||||||
|
c->h264_idct_add8 = ff_h264_idct_add8_neon;
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
#include "asm.S"
|
#include "asm.S"
|
||||||
|
|
||||||
|
preserve8
|
||||||
.fpu neon
|
.fpu neon
|
||||||
|
|
||||||
.text
|
.text
|
||||||
@ -94,3 +95,95 @@ function ff_h264_idct_dc_add_neon, export=1
|
|||||||
vst1.32 {d1[1]}, [r0,:32], r2
|
vst1.32 {d1[1]}, [r0,:32], r2
|
||||||
bx lr
|
bx lr
|
||||||
.endfunc
|
.endfunc
|
||||||
|
|
||||||
|
function ff_h264_idct_add16_neon, export=1
|
||||||
|
push {r4-r8,lr}
|
||||||
|
mov r4, r0
|
||||||
|
mov r5, r1
|
||||||
|
mov r1, r2
|
||||||
|
mov r2, r3
|
||||||
|
ldr r6, [sp, #24]
|
||||||
|
movw r7, #:lower16:scan8
|
||||||
|
movt r7, #:upper16:scan8
|
||||||
|
mov ip, #16
|
||||||
|
1: ldrb r8, [r7], #1
|
||||||
|
ldr r0, [r5], #4
|
||||||
|
ldrb r8, [r6, r8]
|
||||||
|
subs r8, r8, #1
|
||||||
|
blt 2f
|
||||||
|
ldrsh lr, [r1]
|
||||||
|
add r0, r0, r4
|
||||||
|
movne lr, #0
|
||||||
|
cmp lr, #0
|
||||||
|
adrne lr, ff_h264_idct_dc_add_neon
|
||||||
|
adreq lr, ff_h264_idct_add_neon
|
||||||
|
blx lr
|
||||||
|
2: subs ip, ip, #1
|
||||||
|
add r1, r1, #32
|
||||||
|
bne 1b
|
||||||
|
pop {r4-r8,pc}
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
function ff_h264_idct_add16intra_neon, export=1
|
||||||
|
push {r4-r8,lr}
|
||||||
|
mov r4, r0
|
||||||
|
mov r5, r1
|
||||||
|
mov r1, r2
|
||||||
|
mov r2, r3
|
||||||
|
ldr r6, [sp, #24]
|
||||||
|
movw r7, #:lower16:scan8
|
||||||
|
movt r7, #:upper16:scan8
|
||||||
|
mov ip, #16
|
||||||
|
1: ldrb r8, [r7], #1
|
||||||
|
ldr r0, [r5], #4
|
||||||
|
ldrb r8, [r6, r8]
|
||||||
|
add r0, r0, r4
|
||||||
|
cmp r8, #0
|
||||||
|
ldrsh r8, [r1]
|
||||||
|
adrne lr, ff_h264_idct_add_neon
|
||||||
|
adreq lr, ff_h264_idct_dc_add_neon
|
||||||
|
cmpeq r8, #0
|
||||||
|
blxne lr
|
||||||
|
subs ip, ip, #1
|
||||||
|
add r1, r1, #32
|
||||||
|
bne 1b
|
||||||
|
pop {r4-r8,pc}
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
function ff_h264_idct_add8_neon, export=1
|
||||||
|
push {r4-r10,lr}
|
||||||
|
ldm r0, {r4,r9}
|
||||||
|
add r5, r1, #16*4
|
||||||
|
add r1, r2, #16*32
|
||||||
|
mov r2, r3
|
||||||
|
ldr r6, [sp, #32]
|
||||||
|
movw r7, #:lower16:scan8+16
|
||||||
|
movt r7, #:upper16:scan8+16
|
||||||
|
mov ip, #8
|
||||||
|
1: ldrb r8, [r7], #1
|
||||||
|
ldr r0, [r5], #4
|
||||||
|
ldrb r8, [r6, r8]
|
||||||
|
tst ip, #4
|
||||||
|
addeq r0, r0, r4
|
||||||
|
addne r0, r0, r9
|
||||||
|
cmp r8, #0
|
||||||
|
ldrsh r8, [r1]
|
||||||
|
adrne lr, ff_h264_idct_add_neon
|
||||||
|
adreq lr, ff_h264_idct_dc_add_neon
|
||||||
|
cmpeq r8, #0
|
||||||
|
blxne lr
|
||||||
|
subs ip, ip, #1
|
||||||
|
add r1, r1, #32
|
||||||
|
bne 1b
|
||||||
|
pop {r4-r10,pc}
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
.section .rodata
|
||||||
|
scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
|
||||||
|
.byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
|
||||||
|
.byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
|
||||||
|
.byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
|
||||||
|
.byte 1+1*8, 2+1*8
|
||||||
|
.byte 1+2*8, 2+2*8
|
||||||
|
.byte 1+4*8, 2+4*8
|
||||||
|
.byte 1+5*8, 2+5*8
|
||||||
|
Loading…
Reference in New Issue
Block a user