From 5657d14094e0b9d3f277322e49442592973bbdac Mon Sep 17 00:00:00 2001 From: Jason Garrett-Glaser Date: Fri, 14 Jan 2011 21:36:16 +0000 Subject: [PATCH] H.264: switch to x264-style tracking of luma/chroma DC NNZ Useful so that we don't have to run the hierarchical DC iDCT if there aren't any coefficients. Opens up some future opportunities for optimization as well. Originally committed as revision 26337 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/h264.c | 8 ++++++-- libavcodec/h264.h | 19 ++++++++++++++++--- libavcodec/h264_cabac.c | 11 ++++++----- libavcodec/h264_cavlc.c | 10 +++++----- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/libavcodec/h264.c b/libavcodec/h264.c index f3470474ea..a819ee80a2 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -1203,6 +1203,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ } }else{ h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); + if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){ if(is_h264){ if(!transform_bypass) h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]); @@ -1214,6 +1215,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ } }else ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale); + } } if(h->deblocking_filter) xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); @@ -1281,8 +1283,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ } } }else{ - chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); - chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); + if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) + chroma_dc_dequant_idct_c(h->mb + 16*16 , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); + if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) + chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); if(is_h264){ h->h264dsp.h264_idct_add8(dest, block_offset, h->mb, uvlinesize, diff --git a/libavcodec/h264.h b/libavcodec/h264.h index 189864bb38..17fd680369 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -39,8 +39,8 @@ #define interlaced_dct interlaced_dct_is_a_bad_name #define mb_intra mb_intra_is_not_initialized_see_mb_type -#define LUMA_DC_BLOCK_INDEX 25 -#define CHROMA_DC_BLOCK_INDEX 26 +#define LUMA_DC_BLOCK_INDEX 24 +#define CHROMA_DC_BLOCK_INDEX 25 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 #define COEFF_TOKEN_VLC_BITS 8 @@ -722,8 +722,20 @@ o-o o-o / / / o-o o-o */ + +/* Scan8 organization: + * 0 1 2 3 4 5 6 7 + * 0 u u y y y y y + * 1 u U U y Y Y Y Y + * 2 u U U y Y Y Y Y + * 3 v v y Y Y Y Y + * 4 v V V y Y Y Y Y + * 5 v V V DYDUDV + * DY/DU/DV are for luma/chroma DC. + */ + //This table must be here because scan8[constant] must be known at compiletime -static const uint8_t scan8[16 + 2*4]={ +static const uint8_t scan8[16 + 2*4 + 3]={ 4+1*8, 5+1*8, 4+2*8, 5+2*8, 6+1*8, 7+1*8, 6+2*8, 7+2*8, 4+3*8, 5+3*8, 4+4*8, 5+4*8, @@ -732,6 +744,7 @@ static const uint8_t scan8[16 + 2*4]={ 1+2*8, 2+2*8, 1+4*8, 2+4*8, 1+5*8, 2+5*8, + 4+5*8, 5+5*8, 6+5*8 }; static av_always_inline uint32_t pack16to32(int a, int b){ diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index 971af37114..3744095473 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -965,6 +965,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, nza = h->left_cbp&0x100; nzb = h-> top_cbp&0x100; } else { + idx -= CHROMA_DC_BLOCK_INDEX; nza = (h->left_cbp>>(6+idx))&0x01; nzb = (h-> top_cbp>>(6+idx))&0x01; } @@ -1060,8 +1061,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT /* read coded block flag */ if( is_dc || cat != 5 ) { if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) { - if( !is_dc ) - h->non_zero_count_cache[scan8[n]] = 0; + h->non_zero_count_cache[scan8[n]] = 0; #ifdef CABAC_ON_STACK h->cabac.range = cc.range ; @@ -1112,7 +1112,8 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT if( cat == 0 ) h->cbp_table[h->mb_xy] |= 0x100; else - h->cbp_table[h->mb_xy] |= 0x40 << n; + h->cbp_table[h->mb_xy] |= 0x40 << (n - CHROMA_DC_BLOCK_INDEX); + h->non_zero_count_cache[scan8[n]] = coeff_count; } else { if( cat == 5 ) fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1); @@ -1642,7 +1643,7 @@ decode_intra_mb: //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" ); AV_ZERO128(h->mb_luma_dc+0); AV_ZERO128(h->mb_luma_dc+8); - decode_cabac_residual_dc( h, h->mb_luma_dc, 0, 0, scan, 16); + decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16); if( cbp&15 ) { qmul = h->dequant4_coeff[0][s->qscale]; @@ -1681,7 +1682,7 @@ decode_intra_mb: int c; for( c = 0; c < 2; c++ ) { //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); - decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, 4); + decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4); } } diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index 426a285570..b8bc45058c 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -371,7 +371,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in //FIXME put trailing_onex into the context - if(n == CHROMA_DC_BLOCK_INDEX){ + if(n >= CHROMA_DC_BLOCK_INDEX){ coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); total_coeff= coeff_token>>2; }else{ @@ -383,9 +383,9 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in total_coeff= pred_non_zero_count(h, n); coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); total_coeff= coeff_token>>2; - h->non_zero_count_cache[ scan8[n] ]= total_coeff; } } + h->non_zero_count_cache[ scan8[n] ]= total_coeff; //FIXME set last_non_zero? @@ -482,14 +482,14 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in if(total_coeff == max_coeff) zeros_left=0; else{ - if(n == CHROMA_DC_BLOCK_INDEX) + if(n >= CHROMA_DC_BLOCK_INDEX) zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); else zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1); } scantable += zeros_left + total_coeff - 1; - if(n > 24){ + if(n >= LUMA_DC_BLOCK_INDEX){ block[*scantable] = level[0]; for(i=1;i 0;i++) { if(zeros_left < 7) @@ -988,7 +988,7 @@ decode_intra_mb: if(cbp&0x30){ for(chroma_idx=0; chroma_idx<2; chroma_idx++) - if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){ + if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ return -1; } }