ARMv6 optimised pix_abs16_x2

Originally committed as revision 21698 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Måns Rullgård 2010-02-09 16:13:29 +00:00
parent e6056a9008
commit 39a760f678
2 changed files with 58 additions and 0 deletions

View File

@ -315,3 +315,58 @@ function ff_pix_abs16_armv6, export=1
add r0, r12, lr
pop {r4-r9, pc}
.endfunc
function ff_pix_abs16_x2_armv6, export=1
ldr r12, [sp]
push {r4-r11, lr}
mov r0, #0
mov lr, #1
orr lr, lr, lr, lsl #8
orr lr, lr, lr, lsl #16
1:
ldr r8, [r2]
ldr r9, [r2, #4]
lsr r10, r8, #8
ldr r4, [r1]
lsr r6, r9, #8
orr r10, r10, r9, lsl #24
ldr r5, [r2, #8]
eor r11, r8, r10
uhadd8 r7, r8, r10
orr r6, r6, r5, lsl #24
and r11, r11, lr
uadd8 r7, r7, r11
ldr r8, [r1, #4]
usada8 r0, r4, r7, r0
eor r7, r9, r6
lsr r10, r5, #8
and r7, r7, lr
uhadd8 r4, r9, r6
ldr r6, [r2, #12]
uadd8 r4, r4, r7
pld [r1, r3]
orr r10, r10, r6, lsl #24
usada8 r0, r8, r4, r0
ldr r4, [r1, #8]
eor r11, r5, r10
ldrb r7, [r2, #16]
and r11, r11, lr
uhadd8 r8, r5, r10
ldr r5, [r1, #12]
uadd8 r8, r8, r11
pld [r2, r3]
lsr r10, r6, #8
usada8 r0, r4, r8, r0
orr r10, r10, r7, lsl #24
subs r12, r12, #1
eor r11, r6, r10
add r1, r1, r3
uhadd8 r9, r6, r10
and r11, r11, lr
uadd8 r9, r9, r11
add r2, r2, r3
usada8 r0, r5, r9, r0
bgt 1b
pop {r4-r11, pc}
.endfunc

View File

@ -52,6 +52,8 @@ void ff_add_pixels_clamped_armv6(const DCTELEM *block,
int ff_pix_abs16_armv6(void *s, uint8_t *blk1, uint8_t *blk2,
int line_size, int h);
int ff_pix_abs16_x2_armv6(void *s, uint8_t *blk1, uint8_t *blk2,
int line_size, int h);
void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
{
@ -87,6 +89,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
c->pix_abs[0][0] = ff_pix_abs16_armv6;
c->pix_abs[0][1] = ff_pix_abs16_x2_armv6;
c->sad[0] = ff_pix_abs16_armv6;
}