ffmpeg/libavcodec/dsputil.h

/*
 * DSP utils
 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
#ifndef DSPUTIL_H
#define DSPUTIL_H

#include "common.h"
#include "avcodec.h"

//#define DEBUG
/* dct code */
typedef short DCTELEM;

void jpeg_fdct_ifast (DCTELEM *data);

void j_rev_dct (DCTELEM *data);

void fdct_mmx(DCTELEM *block);

void (*av_fdct)(DCTELEM *block);

/* encoding scans */
extern UINT8 ff_alternate_horizontal_scan[64];
extern UINT8 ff_alternate_vertical_scan[64];
extern UINT8 zigzag_direct[64];

/* permutation table */
extern UINT8 permutation[64];

/* pixel operations */
#define MAX_NEG_CROP 384

/* temporary */
extern UINT32 squareTbl[512];
extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];

void dsputil_init(void);

/* pixel ops : interface with DCT */

extern void (*ff_idct)(DCTELEM *block);
extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
extern void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
extern void (*clear_blocks)(DCTELEM *blocks);


void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void clear_blocks_c(DCTELEM *blocks);

/* add and put pixel (decoding) */
typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
typedef void (*qpel_mc_func)(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my);

extern op_pixels_func put_pixels_tab[4];
extern op_pixels_func avg_pixels_tab[4];
extern op_pixels_func put_no_rnd_pixels_tab[4];
extern op_pixels_func avg_no_rnd_pixels_tab[4];
extern qpel_mc_func qpel_mc_rnd_tab[16];
extern qpel_mc_func qpel_mc_no_rnd_tab[16];

/* motion estimation */

typedef int (*op_pixels_abs_func)(UINT8 *blk1, UINT8 *blk2, int line_size);

extern op_pixels_abs_func pix_abs16x16;
extern op_pixels_abs_func pix_abs16x16_x2;
extern op_pixels_abs_func pix_abs16x16_y2;
extern op_pixels_abs_func pix_abs16x16_xy2;
extern op_pixels_abs_func pix_abs8x8;
extern op_pixels_abs_func pix_abs8x8_x2;
extern op_pixels_abs_func pix_abs8x8_y2;
extern op_pixels_abs_func pix_abs8x8_xy2;

int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);

static inline int block_permute_op(int j)
{
	return permutation[j];
}

void block_permute(INT16 *block);

#if defined(HAVE_MMX)

#define MM_MMX    0x0001 /* standard MMX */
#define MM_3DNOW  0x0004 /* AMD 3DNOW */
#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
#define MM_SSE    0x0008 /* SSE functions */
#define MM_SSE2   0x0010 /* PIV SSE2 functions */

extern int mm_flags;

int mm_support(void);

static inline void emms(void)
{
    __asm __volatile ("emms;":::"memory");
}

#define emms_c() \
{\
    if (mm_flags & MM_MMX)\
        emms();\
}

#define __align8 __attribute__ ((aligned (8)))

void dsputil_init_mmx(void);
void dsputil_set_bit_exact_mmx(void);

#elif defined(ARCH_ARMV4L)

#define emms_c()

/* This is to use 4 bytes read to the IDCT pointers for some 'zero'
   line ptimizations */
#define __align8 __attribute__ ((aligned (4)))

void dsputil_init_armv4l(void);   

#elif defined(HAVE_MLIB)
 
#define emms_c()

/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
#define __align8 __attribute__ ((aligned (8)))

void dsputil_init_mlib(void);   

#elif defined(ARCH_ALPHA)

#define emms_c()
#define __align8 __attribute__ ((aligned (8)))

void dsputil_init_alpha(void);

#else

#define emms_c()

#define __align8

#endif

/* PSNR */
void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
              int orig_linesize[3], int coded_linesize,
              AVCodecContext *avctx);
              
#endif
license/copyright change Originally committed as revision 599 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-05-25 22:45:33 +00:00			`/*`
			`* DSP utils`
			`* Copyright (c) 2000, 2001, 2002 Fabrice Bellard.`
			`*`
			`* This library is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2 of the License, or (at your option) any later version.`
			`*`
			`* This library is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with this library; if not, write to the Free Software`
			`* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA`
			`*/`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`#ifndef DSPUTIL_H`
			`#define DSPUTIL_H`

			`#include "common.h"`
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm sure it works ok. Also it's slow, so use it only when you _really_ need to measure quality. - Fix libavcodec Makefile to enable profiling. Originally committed as revision 314 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-02-26 22:14:27 +00:00			`#include "avcodec.h"`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00
divx5-gmc support q-pel mc support neither is totally bugfree yet though :( Originally committed as revision 320 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-03-09 13:01:16 +00:00			`//#define DEBUG`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`/* dct code */`
			`typedef short DCTELEM;`

			`void jpeg_fdct_ifast (DCTELEM *data);`

			`void j_rev_dct (DCTELEM *data);`

			`void fdct_mmx(DCTELEM *block);`

			`void (av_fdct)(DCTELEM block);`

added block permutation functions Originally committed as revision 45 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-08-07 22:43:19 +00:00			`/* encoding scans */`
			`extern UINT8 ff_alternate_horizontal_scan[64];`
			`extern UINT8 ff_alternate_vertical_scan[64];`
			`extern UINT8 zigzag_direct[64];`

fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at> Originally committed as revision 250 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-01-10 00:51:45 +00:00			`/* permutation table */`
			`extern UINT8 permutation[64];`

Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`/* pixel operations */`
			`#define MAX_NEG_CROP 384`

			`/* temporary */`
			`extern UINT32 squareTbl[512];`
export for imgconvert usage Originally committed as revision 67 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-08-11 19:00:43 +00:00			`extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00
			`void dsputil_init(void);`

			`/* pixel ops : interface with DCT */`

MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2 Gives average 13-20% mpeg decoding speedup on x86 systems. Originally committed as revision 30 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-08-03 18:33:03 +00:00			`extern void (ff_idct)(DCTELEM block);`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`extern void (get_pixels)(DCTELEM block, const UINT8 *pixels, int line_size);`
fixed mpeg4 time stuff on encoding mpeg4 b-frame enoding support removed old, out-commented ratecontrol reuse motion compensation code between encoding & decoding prefix newly added global functions with ff_ to reduce namespace polution b-frame ME (unfinished, but working) added some comments to mpegvideo.h do MC on encoding only once if possible bugs? ;) Originally committed as revision 403 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-04-17 04:32:12 +00:00			`extern void (diff_pixels)(DCTELEM block, const UINT8 s1, const UINT8 s2, int stride);`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`extern void (put_pixels_clamped)(const DCTELEM block, UINT8 *pixels, int line_size);`
			`extern void (add_pixels_clamped)(const DCTELEM block, UINT8 *pixels, int line_size);`
divx5-gmc support q-pel mc support neither is totally bugfree yet though :( Originally committed as revision 320 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-03-09 13:01:16 +00:00			`extern void (gmc1)(UINT8 dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);`
sizeof(s->block) isnt 6462 anymore bugfix mpeg12 decoding optimization Originally committed as revision 364 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-03-28 13:41:04 +00:00			`extern void (clear_blocks)(DCTELEM blocks);`
divx5-gmc support q-pel mc support neither is totally bugfree yet though :( Originally committed as revision 320 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-03-09 13:01:16 +00:00
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00
			`void get_pixels_c(DCTELEM block, const UINT8 pixels, int line_size);`
fixed mpeg4 time stuff on encoding mpeg4 b-frame enoding support removed old, out-commented ratecontrol reuse motion compensation code between encoding & decoding prefix newly added global functions with ff_ to reduce namespace polution b-frame ME (unfinished, but working) added some comments to mpegvideo.h do MC on encoding only once if possible bugs? ;) Originally committed as revision 403 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-04-17 04:32:12 +00:00			`void diff_pixels_c(DCTELEM block, const UINT8 s1, const UINT8 *s2, int stride);`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`void put_pixels_clamped_c(const DCTELEM block, UINT8 pixels, int line_size);`
			`void add_pixels_clamped_c(const DCTELEM block, UINT8 pixels, int line_size);`
sizeof(s->block) isnt 6462 anymore bugfix mpeg12 decoding optimization Originally committed as revision 364 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-03-28 13:41:04 +00:00			`void clear_blocks_c(DCTELEM *blocks);`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00
			`/* add and put pixel (decoding) */`
			`typedef void (op_pixels_func)(UINT8 block, const UINT8 *pixels, int line_size, int h);`
divx5-gmc support q-pel mc support neither is totally bugfree yet though :( Originally committed as revision 320 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-03-09 13:01:16 +00:00			`typedef void (qpel_mc_func)(UINT8 dst, UINT8 *src, int dstStride, int srcStride, int mx, int my);`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00
			`extern op_pixels_func put_pixels_tab[4];`
			`extern op_pixels_func avg_pixels_tab[4];`
			`extern op_pixels_func put_no_rnd_pixels_tab[4];`
			`extern op_pixels_func avg_no_rnd_pixels_tab[4];`
divx5-gmc support q-pel mc support neither is totally bugfree yet though :( Originally committed as revision 320 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-03-09 13:01:16 +00:00			`extern qpel_mc_func qpel_mc_rnd_tab[16];`
			`extern qpel_mc_func qpel_mc_no_rnd_tab[16];`

Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`/* motion estimation */`

4MV motion estimation (not finished yet) SAD functions rewritten (8x8 support & MMX2 optimizations) HQ inter/intra decission msmpeg4 encoding bugfix (MV where too long) Originally committed as revision 362 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-03-27 21:25:22 +00:00			`typedef int (op_pixels_abs_func)(UINT8 blk1, UINT8 *blk2, int line_size);`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00
			`extern op_pixels_abs_func pix_abs16x16;`
			`extern op_pixels_abs_func pix_abs16x16_x2;`
			`extern op_pixels_abs_func pix_abs16x16_y2;`
			`extern op_pixels_abs_func pix_abs16x16_xy2;`
4MV motion estimation (not finished yet) SAD functions rewritten (8x8 support & MMX2 optimizations) HQ inter/intra decission msmpeg4 encoding bugfix (MV where too long) Originally committed as revision 362 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-03-27 21:25:22 +00:00			`extern op_pixels_abs_func pix_abs8x8;`
			`extern op_pixels_abs_func pix_abs8x8_x2;`
			`extern op_pixels_abs_func pix_abs8x8_y2;`
			`extern op_pixels_abs_func pix_abs8x8_xy2;`

			`int pix_abs16x16_c(UINT8 blk1, UINT8 blk2, int lx);`
			`int pix_abs16x16_x2_c(UINT8 blk1, UINT8 blk2, int lx);`
			`int pix_abs16x16_y2_c(UINT8 blk1, UINT8 blk2, int lx);`
			`int pix_abs16x16_xy2_c(UINT8 blk1, UINT8 blk2, int lx);`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00
added block permutation functions Originally committed as revision 45 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-08-07 22:43:19 +00:00			`static inline int block_permute_op(int j)`
			`{`
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at> Originally committed as revision 250 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-01-10 00:51:45 +00:00			`return permutation[j];`
added block permutation functions Originally committed as revision 45 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-08-07 22:43:19 +00:00			`}`

			`void block_permute(INT16 *block);`

arm optimizations Originally committed as revision 82 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-08-13 21:45:36 +00:00			`#if defined(HAVE_MMX)`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00
			`#define MM_MMX 0x0001 /* standard MMX */`
			`#define MM_3DNOW 0x0004 /* AMD 3DNOW */`
			`#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */`
			`#define MM_SSE 0x0008 /* SSE functions */`
			`#define MM_SSE2 0x0010 /* PIV SSE2 functions */`

			`extern int mm_flags;`

			`int mm_support(void);`

			`static inline void emms(void)`
			`{`
added emms_c() macro which should can used in c code in both mmx/non mmx cases Originally committed as revision 12 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-24 20:42:03 +00:00			`__asm __volatile ("emms;":::"memory");`
			`}`

			`#define emms_c() \`
			`{\`
			`if (mm_flags & MM_MMX)\`
			`emms();\`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`}`

			`#define __align8 __attribute__ ((aligned (8)))`

			`void dsputil_init_mmx(void);`
removed unused stuff - added dsputil_set_bit_exact() support for easier testing Originally committed as revision 551 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-05-20 16:37:58 +00:00			`void dsputil_set_bit_exact_mmx(void);`
Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00
arm optimizations Originally committed as revision 82 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-08-13 21:45:36 +00:00			`#elif defined(ARCH_ARMV4L)`

			`#define emms_c()`

			`/* This is to use 4 bytes read to the IDCT pointers for some 'zero'`
			`line ptimizations */`
			`#define __align8 __attribute__ ((aligned (4)))`

			`void dsputil_init_armv4l(void);`

mlib merge Originally committed as revision 131 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-09-16 21:54:00 +00:00			`#elif defined(HAVE_MLIB)`

			`#define emms_c()`

			`/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */`
			`#define __align8 __attribute__ ((aligned (8)))`

			`void dsputil_init_mlib(void);`

Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de> Originally committed as revision 274 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-01-20 14:48:02 +00:00			`#elif defined(ARCH_ALPHA)`

			`#define emms_c()`
			`#define __align8 __attribute__ ((aligned (8)))`

			`void dsputil_init_alpha(void);`

Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`#else`

added emms_c() macro which should can used in c code in both mmx/non mmx cases Originally committed as revision 12 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-24 20:42:03 +00:00			`#define emms_c()`

Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`#define __align8`

			`#endif`

- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm sure it works ok. Also it's slow, so use it only when you _really_ need to measure quality. - Fix libavcodec Makefile to enable profiling. Originally committed as revision 314 to svn://svn.ffmpeg.org/ffmpeg/trunk 2002-02-26 22:14:27 +00:00			`/* PSNR */`
			`void get_psnr(UINT8 orig_image[3], UINT8 coded_image[3],`
			`int orig_linesize[3], int coded_linesize,`
			`AVCodecContext *avctx);`

Initial revision Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk 2001-07-22 14:18:56 +00:00			`#endif`