From 649c00c96d7044aed46d70623e47d7434318e6b9 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Thu, 28 Mar 2002 13:41:04 +0000 Subject: [PATCH] sizeof(s->block) isnt 64*6*2 anymore bugfix mpeg12 decoding optimization Originally committed as revision 364 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/dsputil.c | 7 +++++++ libavcodec/dsputil.h | 2 ++ libavcodec/h263dec.c | 25 +++---------------------- libavcodec/i386/dsputil_mmx.c | 20 +++++++++++++++++++- libavcodec/mpeg12.c | 2 +- 5 files changed, 32 insertions(+), 24 deletions(-) diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index d27687d84a..dcfad05a5d 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -30,6 +30,7 @@ void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); +void (*clear_blocks)(DCTELEM *blocks); op_pixels_abs_func pix_abs16x16; op_pixels_abs_func pix_abs16x16_x2; @@ -866,6 +867,11 @@ void block_permute(INT16 *block) } #endif +void clear_blocks_c(DCTELEM *blocks) +{ + memset(blocks, 0, sizeof(DCTELEM)*6*64); +} + void dsputil_init(void) { int i, j; @@ -890,6 +896,7 @@ void dsputil_init(void) put_pixels_clamped = put_pixels_clamped_c; add_pixels_clamped = add_pixels_clamped_c; gmc1= gmc1_c; + clear_blocks= clear_blocks_c; pix_abs16x16 = pix_abs16x16_c; pix_abs16x16_x2 = pix_abs16x16_x2_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 45c1a695a7..ea6a3d84d2 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -40,11 +40,13 @@ extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); +extern void (*clear_blocks)(DCTELEM *blocks); void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); +void clear_blocks_c(DCTELEM *blocks); /* add and put pixel (decoding) */ typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h); diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c index aa822800bd..de9919fdad 100644 --- a/libavcodec/h263dec.c +++ b/libavcodec/h263dec.c @@ -156,6 +156,7 @@ static int h263_decode_frame(AVCodecContext *avctx, if (s->mb_y && !s->h263_pred) { s->first_gob_line = h263_decode_gob_header(s); } + s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1; s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1); s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1; @@ -183,28 +184,8 @@ static int h263_decode_frame(AVCodecContext *avctx, s->y_dc_scale = 8; s->c_dc_scale = 8; } - -#ifdef HAVE_MMX - if (mm_flags & MM_MMX) { - asm volatile( - "pxor %%mm7, %%mm7 \n\t" - "movl $-128*6, %%eax \n\t" - "1: \n\t" - "movq %%mm7, (%0, %%eax) \n\t" - "movq %%mm7, 8(%0, %%eax) \n\t" - "movq %%mm7, 16(%0, %%eax) \n\t" - "movq %%mm7, 24(%0, %%eax) \n\t" - "addl $32, %%eax \n\t" - " js 1b \n\t" - : : "r" (((int)s->block)+128*6) - : "%eax" - ); - }else{ - memset(s->block, 0, sizeof(s->block)); - } -#else - memset(s->block, 0, sizeof(s->block)); -#endif + clear_blocks(s->block[0]); + s->mv_dir = MV_DIR_FORWARD; s->mv_type = MV_TYPE_16X16; if (s->h263_msmpeg4) { diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 09a7174126..bf729d9638 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -1025,6 +1025,23 @@ static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line } while(--h); } +static void clear_blocks_mmx(DCTELEM *blocks) +{ + asm volatile( + "pxor %%mm7, %%mm7 \n\t" + "movl $-128*6, %%eax \n\t" + "1: \n\t" + "movq %%mm7, (%0, %%eax) \n\t" + "movq %%mm7, 8(%0, %%eax) \n\t" + "movq %%mm7, 16(%0, %%eax) \n\t" + "movq %%mm7, 24(%0, %%eax) \n\t" + "addl $32, %%eax \n\t" + " js 1b \n\t" + : : "r" (((int)blocks)+128*6) + : "%eax" + ); +} + static void just_return() { return; } void dsputil_init_mmx(void) @@ -1049,7 +1066,8 @@ void dsputil_init_mmx(void) get_pixels = get_pixels_mmx; put_pixels_clamped = put_pixels_clamped_mmx; add_pixels_clamped = add_pixels_clamped_mmx; - + clear_blocks= clear_blocks_mmx; + pix_abs16x16 = pix_abs16x16_mmx; pix_abs16x16_x2 = pix_abs16x16_x2_mmx; pix_abs16x16_y2 = pix_abs16x16_y2_mmx; diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c index 4d61df53c2..24fc5db85b 100644 --- a/libavcodec/mpeg12.c +++ b/libavcodec/mpeg12.c @@ -1402,7 +1402,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, } for(;;) { - memset(s->block, 0, sizeof(s->block)); + clear_blocks(s->block[0]); ret = mpeg_decode_mb(s, s->block); dprintf("ret=%d\n", ret); if (ret < 0)