ffmpeg/libavcodec/ppc/idct_altivec.c

/*
 * Copyright (c) 2001 Michel Lespinasse
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/*
 * NOTE: This code is based on GPL code from the libmpeg2 project.  The
 * author, Michel Lespinasses, has given explicit permission to release
 * under LGPL as part of ffmpeg.
 */

/*
 * FFMpeg integration by Dieter Shirley
 *
 * This file is a direct copy of the altivec idct module from the libmpeg2
 * project.  I've deleted all of the libmpeg2 specific code, renamed the functions and
 * re-ordered the function parameters.  The only change to the IDCT function
 * itself was to factor out the partial transposition, and to perform a full
 * transpose at the end of the function.
 */


#include <stdlib.h>                                      /* malloc(), free() */
#include <string.h>
#include "libavcodec/dsputil.h"

#include "gcc_fixes.h"
#include "types_altivec.h"
#include "dsputil_ppc.h"

#define IDCT_HALF                                       \
    /* 1st stage */                                     \
    t1 = vec_mradds (a1, vx7, vx1 );                    \
    t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7));    \
    t7 = vec_mradds (a2, vx5, vx3);                     \
    t3 = vec_mradds (ma2, vx3, vx5);                    \
                                                        \
    /* 2nd stage */                                     \
    t5 = vec_adds (vx0, vx4);                           \
    t0 = vec_subs (vx0, vx4);                           \
    t2 = vec_mradds (a0, vx6, vx2);                     \
    t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6));    \
    t6 = vec_adds (t8, t3);                             \
    t3 = vec_subs (t8, t3);                             \
    t8 = vec_subs (t1, t7);                             \
    t1 = vec_adds (t1, t7);                             \
                                                        \
    /* 3rd stage */                                     \
    t7 = vec_adds (t5, t2);                             \
    t2 = vec_subs (t5, t2);                             \
    t5 = vec_adds (t0, t4);                             \
    t0 = vec_subs (t0, t4);                             \
    t4 = vec_subs (t8, t3);                             \
    t3 = vec_adds (t8, t3);                             \
                                                        \
    /* 4th stage */                                     \
    vy0 = vec_adds (t7, t1);                            \
    vy7 = vec_subs (t7, t1);                            \
    vy1 = vec_mradds (c4, t3, t5);                      \
    vy6 = vec_mradds (mc4, t3, t5);                     \
    vy2 = vec_mradds (c4, t4, t0);                      \
    vy5 = vec_mradds (mc4, t4, t0);                     \
    vy3 = vec_adds (t2, t6);                            \
    vy4 = vec_subs (t2, t6);


#define IDCT                                                            \
    vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;                \
    vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;                \
    vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias;                  \
    vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8;                    \
    vec_u16 shift;                                                 \
                                                                        \
    c4 = vec_splat (constants[0], 0);                                   \
    a0 = vec_splat (constants[0], 1);                                   \
    a1 = vec_splat (constants[0], 2);                                   \
    a2 = vec_splat (constants[0], 3);                                   \
    mc4 = vec_splat (constants[0], 4);                                  \
    ma2 = vec_splat (constants[0], 5);                                  \
    bias = (vec_s16)vec_splat ((vec_s32)constants[0], 3);     \
                                                                        \
    zero = vec_splat_s16 (0);                                           \
    shift = vec_splat_u16 (4);                                          \
                                                                        \
    vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero);    \
    vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero);    \
    vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero);    \
    vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero);    \
    vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero);    \
    vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero);    \
    vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero);    \
    vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero);    \
                                                                        \
    IDCT_HALF                                                           \
                                                                        \
    vx0 = vec_mergeh (vy0, vy4);                                        \
    vx1 = vec_mergel (vy0, vy4);                                        \
    vx2 = vec_mergeh (vy1, vy5);                                        \
    vx3 = vec_mergel (vy1, vy5);                                        \
    vx4 = vec_mergeh (vy2, vy6);                                        \
    vx5 = vec_mergel (vy2, vy6);                                        \
    vx6 = vec_mergeh (vy3, vy7);                                        \
    vx7 = vec_mergel (vy3, vy7);                                        \
                                                                        \
    vy0 = vec_mergeh (vx0, vx4);                                        \
    vy1 = vec_mergel (vx0, vx4);                                        \
    vy2 = vec_mergeh (vx1, vx5);                                        \
    vy3 = vec_mergel (vx1, vx5);                                        \
    vy4 = vec_mergeh (vx2, vx6);                                        \
    vy5 = vec_mergel (vx2, vx6);                                        \
    vy6 = vec_mergeh (vx3, vx7);                                        \
    vy7 = vec_mergel (vx3, vx7);                                        \
                                                                        \
    vx0 = vec_adds (vec_mergeh (vy0, vy4), bias);                       \
    vx1 = vec_mergel (vy0, vy4);                                        \
    vx2 = vec_mergeh (vy1, vy5);                                        \
    vx3 = vec_mergel (vy1, vy5);                                        \
    vx4 = vec_mergeh (vy2, vy6);                                        \
    vx5 = vec_mergel (vy2, vy6);                                        \
    vx6 = vec_mergeh (vy3, vy7);                                        \
    vx7 = vec_mergel (vy3, vy7);                                        \
                                                                        \
    IDCT_HALF                                                           \
                                                                        \
    shift = vec_splat_u16 (6);                                          \
    vx0 = vec_sra (vy0, shift);                                         \
    vx1 = vec_sra (vy1, shift);                                         \
    vx2 = vec_sra (vy2, shift);                                         \
    vx3 = vec_sra (vy3, shift);                                         \
    vx4 = vec_sra (vy4, shift);                                         \
    vx5 = vec_sra (vy5, shift);                                         \
    vx6 = vec_sra (vy6, shift);                                         \
    vx7 = vec_sra (vy7, shift);


static const vec_s16 constants[5] = {
    {23170, 13573,  6518, 21895, -23170, -21895,    32,    31},
    {16384, 22725, 21407, 19266,  16384,  19266, 21407, 22725},
    {22725, 31521, 29692, 26722,  22725,  26722, 29692, 31521},
    {21407, 29692, 27969, 25172,  21407,  25172, 27969, 29692},
    {19266, 26722, 25172, 22654,  19266,  22654, 25172, 26722}
};

void idct_put_altivec(uint8_t* dest, int stride, vec_s16* block)
{
POWERPC_PERF_DECLARE(altivec_idct_put_num, 1);
    vec_u8 tmp;

#ifdef CONFIG_POWERPC_PERF
POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
#endif
    IDCT

#define COPY(dest,src)                                          \
    tmp = vec_packsu (src, src);                                \
    vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest);       \
    vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);

    COPY (dest, vx0)    dest += stride;
    COPY (dest, vx1)    dest += stride;
    COPY (dest, vx2)    dest += stride;
    COPY (dest, vx3)    dest += stride;
    COPY (dest, vx4)    dest += stride;
    COPY (dest, vx5)    dest += stride;
    COPY (dest, vx6)    dest += stride;
    COPY (dest, vx7)

POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
}

void idct_add_altivec(uint8_t* dest, int stride, vec_s16* block)
{
POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);
    vec_u8 tmp;
    vec_s16 tmp2, tmp3;
    vec_u8 perm0;
    vec_u8 perm1;
    vec_u8 p0, p1, p;

#ifdef CONFIG_POWERPC_PERF
POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
#endif

    IDCT

    p0 = vec_lvsl (0, dest);
    p1 = vec_lvsl (stride, dest);
    p = vec_splat_u8 (-1);
    perm0 = vec_mergeh (p, p0);
    perm1 = vec_mergeh (p, p1);

#define ADD(dest,src,perm)                                              \
    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */                        \
    tmp = vec_ld (0, dest);                                             \
    tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm);       \
    tmp3 = vec_adds (tmp2, src);                                        \
    tmp = vec_packsu (tmp3, tmp3);                                      \
    vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest);               \
    vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);

    ADD (dest, vx0, perm0)      dest += stride;
    ADD (dest, vx1, perm1)      dest += stride;
    ADD (dest, vx2, perm0)      dest += stride;
    ADD (dest, vx3, perm1)      dest += stride;
    ADD (dest, vx4, perm0)      dest += stride;
    ADD (dest, vx5, perm1)      dest += stride;
    ADD (dest, vx6, perm0)      dest += stride;
    ADD (dest, vx7, perm1)

POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
}
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`/*`
			`* Copyright (c) 2001 Michel Lespinasse`
			`*`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-10-07 15:30:46 +00:00			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-10-07 15:30:46 +00:00			`* version 2.1 of the License, or (at your option) any later version.`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`*`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-10-07 15:30:46 +00:00			`* FFmpeg is distributed in the hope that it will be useful,`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-10-07 15:30:46 +00:00			`* License along with FFmpeg; if not, write to the Free Software`
Update licensing information: The FSF changed postal address. Originally committed as revision 4842 to svn://svn.ffmpeg.org/ffmpeg/trunk 2006-01-12 22:43:26 +00:00			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`*/`

			`/*`
			`* NOTE: This code is based on GPL code from the libmpeg2 project. The`
			`* author, Michel Lespinasses, has given explicit permission to release`
			`* under LGPL as part of ffmpeg.`
			`*/`

			`/*`
			`* FFMpeg integration by Dieter Shirley`
			`*`
			`* This file is a direct copy of the altivec idct module from the libmpeg2`
			`* project. I've deleted all of the libmpeg2 specific code, renamed the functions and`
			`* re-ordered the function parameters. The only change to the IDCT function`
			`* itself was to factor out the partial transposition, and to perform a full`
			`* transpose at the end of the function.`
			`*/`


			`#include <stdlib.h> /* malloc(), free() */`
			`#include <string.h>`
Use full path for #includes from another directory. Originally committed as revision 13098 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-05-09 11:56:36 +00:00			`#include "libavcodec/dsputil.h"`
altivec gcc-3 fixes by (Magnus Damm <damm at opensource dot se>) Originally committed as revision 1896 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-05-21 17:50:57 +00:00
			`#include "gcc_fixes.h"`
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`#include "types_altivec.h"`
Change some files to only include the necessary headers. Originally committed as revision 11394 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-01-04 13:38:25 +00:00			`#include "dsputil_ppc.h"`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00
COSMETICS: tabs --> spaces, some prettyprinting Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-22 01:10:11 +00:00			`#define IDCT_HALF \`
			`/* 1st stage */ \`
			`t1 = vec_mradds (a1, vx7, vx1 ); \`
			`t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \`
			`t7 = vec_mradds (a2, vx5, vx3); \`
			`t3 = vec_mradds (ma2, vx3, vx5); \`
			`\`
			`/* 2nd stage */ \`
			`t5 = vec_adds (vx0, vx4); \`
			`t0 = vec_subs (vx0, vx4); \`
			`t2 = vec_mradds (a0, vx6, vx2); \`
			`t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \`
			`t6 = vec_adds (t8, t3); \`
			`t3 = vec_subs (t8, t3); \`
			`t8 = vec_subs (t1, t7); \`
			`t1 = vec_adds (t1, t7); \`
			`\`
			`/* 3rd stage */ \`
			`t7 = vec_adds (t5, t2); \`
			`t2 = vec_subs (t5, t2); \`
			`t5 = vec_adds (t0, t4); \`
			`t0 = vec_subs (t0, t4); \`
			`t4 = vec_subs (t8, t3); \`
			`t3 = vec_adds (t8, t3); \`
			`\`
			`/* 4th stage */ \`
			`vy0 = vec_adds (t7, t1); \`
			`vy7 = vec_subs (t7, t1); \`
			`vy1 = vec_mradds (c4, t3, t5); \`
			`vy6 = vec_mradds (mc4, t3, t5); \`
			`vy2 = vec_mradds (c4, t4, t0); \`
			`vy5 = vec_mradds (mc4, t4, t0); \`
			`vy3 = vec_adds (t2, t6); \`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`vy4 = vec_subs (t2, t6);`

COSMETICS: Remove all trailing whitespace. Originally committed as revision 4749 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-17 18:14:38 +00:00
COSMETICS: tabs --> spaces, some prettyprinting Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-22 01:10:11 +00:00			`#define IDCT \`
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \`
			`vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \`
			`vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias; \`
			`vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \`
			`vec_u16 shift; \`
COSMETICS: tabs --> spaces, some prettyprinting Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-22 01:10:11 +00:00			`\`
			`c4 = vec_splat (constants[0], 0); \`
			`a0 = vec_splat (constants[0], 1); \`
			`a1 = vec_splat (constants[0], 2); \`
			`a2 = vec_splat (constants[0], 3); \`
			`mc4 = vec_splat (constants[0], 4); \`
			`ma2 = vec_splat (constants[0], 5); \`
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`bias = (vec_s16)vec_splat ((vec_s32)constants[0], 3); \`
COSMETICS: tabs --> spaces, some prettyprinting Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-22 01:10:11 +00:00			`\`
			`zero = vec_splat_s16 (0); \`
			`shift = vec_splat_u16 (4); \`
			`\`
			`vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \`
			`vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \`
			`vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \`
			`vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \`
			`vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \`
			`vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \`
			`vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \`
			`vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \`
			`\`
			`IDCT_HALF \`
			`\`
			`vx0 = vec_mergeh (vy0, vy4); \`
			`vx1 = vec_mergel (vy0, vy4); \`
			`vx2 = vec_mergeh (vy1, vy5); \`
			`vx3 = vec_mergel (vy1, vy5); \`
			`vx4 = vec_mergeh (vy2, vy6); \`
			`vx5 = vec_mergel (vy2, vy6); \`
			`vx6 = vec_mergeh (vy3, vy7); \`
			`vx7 = vec_mergel (vy3, vy7); \`
			`\`
			`vy0 = vec_mergeh (vx0, vx4); \`
			`vy1 = vec_mergel (vx0, vx4); \`
			`vy2 = vec_mergeh (vx1, vx5); \`
			`vy3 = vec_mergel (vx1, vx5); \`
			`vy4 = vec_mergeh (vx2, vx6); \`
			`vy5 = vec_mergel (vx2, vx6); \`
			`vy6 = vec_mergeh (vx3, vx7); \`
			`vy7 = vec_mergel (vx3, vx7); \`
			`\`
			`vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \`
			`vx1 = vec_mergel (vy0, vy4); \`
			`vx2 = vec_mergeh (vy1, vy5); \`
			`vx3 = vec_mergel (vy1, vy5); \`
			`vx4 = vec_mergeh (vy2, vy6); \`
			`vx5 = vec_mergel (vy2, vy6); \`
			`vx6 = vec_mergeh (vy3, vy7); \`
			`vx7 = vec_mergel (vy3, vy7); \`
			`\`
			`IDCT_HALF \`
			`\`
			`shift = vec_splat_u16 (6); \`
			`vx0 = vec_sra (vy0, shift); \`
			`vx1 = vec_sra (vy1, shift); \`
			`vx2 = vec_sra (vy2, shift); \`
			`vx3 = vec_sra (vy3, shift); \`
			`vx4 = vec_sra (vy4, shift); \`
			`vx5 = vec_sra (vy5, shift); \`
			`vx6 = vec_sra (vy6, shift); \`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`vx7 = vec_sra (vy7, shift);`

Altivec on non darwin systems patch by Romain Dolbeau Originally committed as revision 1509 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-01-26 22:29:47 +00:00
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`static const vec_s16 constants[5] = {`
Remove AltiVec vector declaration compiler compatibility macros. The original problem was that FSF and Apple gcc used a different syntax for vector declarations, i.e. {} vs. (). Nowadays Apple gcc versions support the standard {} syntax and versions that support {} are available on all relevant Mac OS X versions. Thus the greater compatibility is no longer worth cluttering the code with macros. Originally committed as revision 14366 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-07-24 10:53:32 +00:00			`{23170, 13573, 6518, 21895, -23170, -21895, 32, 31},`
			`{16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725},`
			`{22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521},`
			`{21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692},`
			`{19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722}`
Altivec on non darwin systems patch by Romain Dolbeau Originally committed as revision 1509 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-01-26 22:29:47 +00:00			`};`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`void idct_put_altivec(uint8_t* dest, int stride, vec_s16* block)`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`{`
1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 2022 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-07-09 20:18:13 +00:00			`POWERPC_PERF_DECLARE(altivec_idct_put_num, 1);`
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`vec_u8 tmp;`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00
rename POWERPC_PERFORMANCE_REPORT to CONFIG_POWERPC_PERF Originally committed as revision 7968 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-02-13 23:45:28 +00:00			`#ifdef CONFIG_POWERPC_PERF`
1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 2022 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-07-09 20:18:13 +00:00			`POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);`
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>) Originally committed as revision 2823 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-02-28 15:03:53 +00:00			`#endif`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`IDCT`

COSMETICS: tabs --> spaces, some prettyprinting Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-22 01:10:11 +00:00			`#define COPY(dest,src) \`
			`tmp = vec_packsu (src, src); \`
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \`
			`vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00
COSMETICS: tabs --> spaces, some prettyprinting Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-22 01:10:11 +00:00			`COPY (dest, vx0) dest += stride;`
			`COPY (dest, vx1) dest += stride;`
			`COPY (dest, vx2) dest += stride;`
			`COPY (dest, vx3) dest += stride;`
			`COPY (dest, vx4) dest += stride;`
			`COPY (dest, vx5) dest += stride;`
			`COPY (dest, vx6) dest += stride;`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`COPY (dest, vx7)`
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 1464 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-01-16 21:54:55 +00:00
1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 2022 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-07-09 20:18:13 +00:00			`POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`}`

Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`void idct_add_altivec(uint8_t* dest, int stride, vec_s16* block)`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`{`
1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 2022 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-07-09 20:18:13 +00:00			`POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);`
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`vec_u8 tmp;`
			`vec_s16 tmp2, tmp3;`
			`vec_u8 perm0;`
			`vec_u8 perm1;`
			`vec_u8 p0, p1, p;`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00
rename POWERPC_PERFORMANCE_REPORT to CONFIG_POWERPC_PERF Originally committed as revision 7968 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-02-13 23:45:28 +00:00			`#ifdef CONFIG_POWERPC_PERF`
1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 2022 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-07-09 20:18:13 +00:00			`POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);`
Metrowerks CodeWarrior patches by (John Dalgliesh <johnd at defyne dot org>) Originally committed as revision 2823 to svn://svn.ffmpeg.org/ffmpeg/trunk 2004-02-28 15:03:53 +00:00			`#endif`
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 1464 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-01-16 21:54:55 +00:00
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`IDCT`

			`p0 = vec_lvsl (0, dest);`
			`p1 = vec_lvsl (stride, dest);`
			`p = vec_splat_u8 (-1);`
			`perm0 = vec_mergeh (p, p0);`
			`perm1 = vec_mergeh (p, p1);`

COSMETICS: tabs --> spaces, some prettyprinting Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-22 01:10:11 +00:00			`#define ADD(dest,src,perm) \`
			`/* (uint64_t )&tmp = (uint64_t )dest; */ \`
			`tmp = vec_ld (0, dest); \`
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm); \`
COSMETICS: tabs --> spaces, some prettyprinting Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-22 01:10:11 +00:00			`tmp3 = vec_adds (tmp2, src); \`
			`tmp = vec_packsu (tmp3, tmp3); \`
Cleanup _t types in libavcodec/ppc Originally committed as revision 16357 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-27 11:21:28 +00:00			`vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \`
			`vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00
COSMETICS: tabs --> spaces, some prettyprinting Originally committed as revision 4764 to svn://svn.ffmpeg.org/ffmpeg/trunk 2005-12-22 01:10:11 +00:00			`ADD (dest, vx0, perm0) dest += stride;`
			`ADD (dest, vx1, perm1) dest += stride;`
			`ADD (dest, vx2, perm0) dest += stride;`
			`ADD (dest, vx3, perm1) dest += stride;`
			`ADD (dest, vx4, perm0) dest += stride;`
			`ADD (dest, vx5, perm1) dest += stride;`
			`ADD (dest, vx6, perm0) dest += stride;`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`ADD (dest, vx7, perm1)`
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 1464 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-01-16 21:54:55 +00:00
1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>) Originally committed as revision 2022 to svn://svn.ffmpeg.org/ffmpeg/trunk 2003-07-09 20:18:13 +00:00			`POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);`
Altivec Patch (Mark III) by (Dieter Shirley <dieters at schemasoft dot com>) Originally committed as revision 1147 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-11-02 11:28:08 +00:00			`}`