ADI Blackfin optimizations

patch by Michael Benjamin, neuroptik gmail com

Originally committed as revision 6282 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Michael Benjamin 2006-09-16 22:26:09 +00:00 committed by Diego Biurrun
parent cab3ef828a
commit 5ca030fed4
4 changed files with 64 additions and 0 deletions

View File

@ -357,6 +357,7 @@ OBJS-$(TARGET_ALTIVEC) += ppc/dsputil_altivec.o \
ppc/snow_altivec.o \
ppc/vc1dsp_altivec.o \
ppc/float_altivec.o
OBJS-$(TARGET_ARCH_BFIN) += bfin/dsputil_bfin.o
CFLAGS += $(CFLAGS-yes)
OBJS += $(OBJS-yes)

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2006 Michael Benjamin
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "../avcodec.h"
#include "../dsputil.h"
static int sad8x8_bfin( void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h )
{
int sum;
__asm__ __volatile__ (
"P0 = %1;" // blk1
"P1 = %2;" // blk2
"P2 = %3;\n" // h
"I0 = P0;"
"I1 = P1;\n"
"A0 = 0;"
"A1 = 0;\n"
"M0 = P2;\n"
"P3 = 32;\n"
"LSETUP (sad8x8LoopBegin, sad8x8LoopEnd) LC0=P3;\n"
"sad8x8LoopBegin:\n"
" DISALGNEXCPT || R0 = [I0] || R2 = [I1];\n"
" DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];\n"
"sad8x8LoopEnd:\n"
" SAA ( R1:0 , R3:2 );\n"
"R3 = A1.L + A1.H, R2 = A0.L + A0.H;\n"
"%0 = R2 + R3 (S);\n"
: "=&d" (sum)
: "m"(blk1), "m"(blk2), "m"(h)
: "P0","P1","P2","I0","I1","A0","A1","R0","R1","R2","R3");
return sum;
}
void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
{
c->pix_abs[1][0] = sad8x8_bfin;
c->sad[1] = sad8x8_bfin;
}

View File

@ -4189,6 +4189,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
#ifdef ARCH_SH4
dsputil_init_sh4(c,avctx);
#endif
#ifdef ARCH_BFIN
dsputil_init_bfin(c,avctx);
#endif
switch(c->idct_permutation_type){
case FF_NO_IDCT_PERM:

View File

@ -571,6 +571,13 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
#elif defined(ARCH_BFIN)
#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8)))
#define STRIDE_ALIGN 8
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
#else
#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8)))