libavutil: Add ARM av_clip_intp2_arm

add ARM code for implementing av_clip_intp2 using the ssat instruction

on Cortex-A8, av_clip_intp2_arm() is faster than av_clip_intp2_c() and
the generic av_clip(), about -19%

Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
This commit is contained in:
Peter Meerwald 2015-02-20 01:35:34 +01:00 committed by Luca Barbato
parent bf07d813f6
commit 76ce9bd8e2
1 changed files with 8 additions and 0 deletions

View File

@ -62,6 +62,14 @@ static av_always_inline av_const int av_clip_int16_arm(int a)
return x;
}
#define av_clip_intp2 av_clip_intp2_arm
static av_always_inline av_const int av_clip_intp2_arm(int a, int p)
{
unsigned x;
__asm__ ("ssat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p+1));
return x;
}
#define av_clip_uintp2 av_clip_uintp2_arm
static av_always_inline av_const unsigned av_clip_uintp2_arm(int a, int p)
{