From 7801d21d13dcf442d92614534c312d1e69df2467 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Wed, 23 Oct 2002 15:11:07 +0000 Subject: [PATCH] optimize block_permute() optimize dct_quantize_c() dont permute s->q_inter/intra_matrix Originally committed as revision 1067 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/dsputil.c | 25 +++++++++++++++++-------- libavcodec/dsputil.h | 6 +++++- libavcodec/mpegvideo.c | 18 +++++++++--------- libavcodec/mpegvideo.h | 1 + 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 8fcfd1d020..a8578b5c7b 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -1553,16 +1553,25 @@ int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -/* permute block according so that it corresponds to the MMX idct - order */ -void block_permute(INT16 *block, UINT8 *permutation) +void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last) { - int i; - INT16 temp[64]; + int i; + INT16 temp[64]; + + if(last<=0) return; + if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms - for(i=0; i<64; i++) temp[ permutation[i] ] = block[i]; - - for(i=0; i<64; i++) block[i] = temp[i]; + for(i=0; i<=last; i++){ + const int j= scantable[i]; + temp[j]= block[j]; + block[j]=0; + } + + for(i=0; i<=last; i++){ + const int j= scantable[i]; + const int perm_j= permutation[j]; + block[perm_j]= temp[j]; + } } void clear_blocks_c(DCTELEM *blocks) diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index c3e85c8225..a8285c80b6 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -115,7 +115,11 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); -void block_permute(INT16 *block, UINT8 *permutation); +/** + * permute block according to permuatation. + * @param last last non zero element in scantable order + */ +void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); #if defined(HAVE_MMX) diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 70bce2db20..e3fce2a85a 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -94,7 +94,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) / + qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); } } else if (s->fdct == fdct_ifast) { @@ -105,7 +105,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / + qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / (aanscales[i] * qscale * quant_matrix[j])); } } else { @@ -138,6 +138,8 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){ int i; int end; + + st->scantable= src_scantable; for(i=0; i<64; i++){ int j; @@ -2968,18 +2970,13 @@ static int dct_quantize_c(MpegEncContext *s, { int i, j, level, last_non_zero, q; const int *qmat; + const UINT8 *scantable= s->intra_scantable.scantable; int bias; int max=0; unsigned int threshold1, threshold2; s->fdct (block); -#ifndef ARCH_ALPHA /* Alpha uses unpermuted matrix */ - /* we need this permutation so that we correct the IDCT - permutation. will be moved into DCT code */ - block_permute(block, s->idct_permutation); //FIXME remove -#endif - if (s->mb_intra) { if (!s->h263_aic) { if (n < 4) @@ -3007,7 +3004,7 @@ static int dct_quantize_c(MpegEncContext *s, threshold2= (threshold1<<1); for(;i<64;i++) { - j = s->intra_scantable.permutated[i]; + j = scantable[i]; level = block[j]; level = level * qmat[j]; @@ -3029,6 +3026,9 @@ static int dct_quantize_c(MpegEncContext *s, } *overflow= s->max_qcoeff < max; //overflow might have happend + /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ + ff_block_permute(block, s->idct_permutation, scantable, last_non_zero); + return last_non_zero; } diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 254e85dac7..dc5af27294 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -100,6 +100,7 @@ typedef struct ReorderBuffer{ } ReorderBuffer; typedef struct ScanTable{ + const UINT8 *scantable; UINT8 permutated[64]; UINT8 raster_end[64]; } ScanTable;