mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-12-18 05:24:42 +00:00
H.264: switch to x264-style tracking of luma/chroma DC NNZ
Useful so that we don't have to run the hierarchical DC iDCT if there aren't any coefficients. Opens up some future opportunities for optimization as well. Originally committed as revision 26337 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
19fb234e4a
commit
5657d14094
@ -1203,6 +1203,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
||||
}
|
||||
}else{
|
||||
h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
|
||||
if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
|
||||
if(is_h264){
|
||||
if(!transform_bypass)
|
||||
h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
|
||||
@ -1214,6 +1215,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
||||
}
|
||||
}else
|
||||
ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
|
||||
}
|
||||
}
|
||||
if(h->deblocking_filter)
|
||||
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
|
||||
@ -1281,8 +1283,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
|
||||
}
|
||||
}
|
||||
}else{
|
||||
chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
|
||||
chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
|
||||
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
|
||||
chroma_dc_dequant_idct_c(h->mb + 16*16 , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
|
||||
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
|
||||
chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
|
||||
if(is_h264){
|
||||
h->h264dsp.h264_idct_add8(dest, block_offset,
|
||||
h->mb, uvlinesize,
|
||||
|
@ -39,8 +39,8 @@
|
||||
#define interlaced_dct interlaced_dct_is_a_bad_name
|
||||
#define mb_intra mb_intra_is_not_initialized_see_mb_type
|
||||
|
||||
#define LUMA_DC_BLOCK_INDEX 25
|
||||
#define CHROMA_DC_BLOCK_INDEX 26
|
||||
#define LUMA_DC_BLOCK_INDEX 24
|
||||
#define CHROMA_DC_BLOCK_INDEX 25
|
||||
|
||||
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
|
||||
#define COEFF_TOKEN_VLC_BITS 8
|
||||
@ -722,8 +722,20 @@ o-o o-o
|
||||
/ / /
|
||||
o-o o-o
|
||||
*/
|
||||
|
||||
/* Scan8 organization:
|
||||
* 0 1 2 3 4 5 6 7
|
||||
* 0 u u y y y y y
|
||||
* 1 u U U y Y Y Y Y
|
||||
* 2 u U U y Y Y Y Y
|
||||
* 3 v v y Y Y Y Y
|
||||
* 4 v V V y Y Y Y Y
|
||||
* 5 v V V DYDUDV
|
||||
* DY/DU/DV are for luma/chroma DC.
|
||||
*/
|
||||
|
||||
//This table must be here because scan8[constant] must be known at compiletime
|
||||
static const uint8_t scan8[16 + 2*4]={
|
||||
static const uint8_t scan8[16 + 2*4 + 3]={
|
||||
4+1*8, 5+1*8, 4+2*8, 5+2*8,
|
||||
6+1*8, 7+1*8, 6+2*8, 7+2*8,
|
||||
4+3*8, 5+3*8, 4+4*8, 5+4*8,
|
||||
@ -732,6 +744,7 @@ static const uint8_t scan8[16 + 2*4]={
|
||||
1+2*8, 2+2*8,
|
||||
1+4*8, 2+4*8,
|
||||
1+5*8, 2+5*8,
|
||||
4+5*8, 5+5*8, 6+5*8
|
||||
};
|
||||
|
||||
static av_always_inline uint32_t pack16to32(int a, int b){
|
||||
|
@ -965,6 +965,7 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
|
||||
nza = h->left_cbp&0x100;
|
||||
nzb = h-> top_cbp&0x100;
|
||||
} else {
|
||||
idx -= CHROMA_DC_BLOCK_INDEX;
|
||||
nza = (h->left_cbp>>(6+idx))&0x01;
|
||||
nzb = (h-> top_cbp>>(6+idx))&0x01;
|
||||
}
|
||||
@ -1060,8 +1061,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
|
||||
/* read coded block flag */
|
||||
if( is_dc || cat != 5 ) {
|
||||
if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
|
||||
if( !is_dc )
|
||||
h->non_zero_count_cache[scan8[n]] = 0;
|
||||
h->non_zero_count_cache[scan8[n]] = 0;
|
||||
|
||||
#ifdef CABAC_ON_STACK
|
||||
h->cabac.range = cc.range ;
|
||||
@ -1112,7 +1112,8 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
|
||||
if( cat == 0 )
|
||||
h->cbp_table[h->mb_xy] |= 0x100;
|
||||
else
|
||||
h->cbp_table[h->mb_xy] |= 0x40 << n;
|
||||
h->cbp_table[h->mb_xy] |= 0x40 << (n - CHROMA_DC_BLOCK_INDEX);
|
||||
h->non_zero_count_cache[scan8[n]] = coeff_count;
|
||||
} else {
|
||||
if( cat == 5 )
|
||||
fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
|
||||
@ -1642,7 +1643,7 @@ decode_intra_mb:
|
||||
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
|
||||
AV_ZERO128(h->mb_luma_dc+0);
|
||||
AV_ZERO128(h->mb_luma_dc+8);
|
||||
decode_cabac_residual_dc( h, h->mb_luma_dc, 0, 0, scan, 16);
|
||||
decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
|
||||
|
||||
if( cbp&15 ) {
|
||||
qmul = h->dequant4_coeff[0][s->qscale];
|
||||
@ -1681,7 +1682,7 @@ decode_intra_mb:
|
||||
int c;
|
||||
for( c = 0; c < 2; c++ ) {
|
||||
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
|
||||
decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, 4);
|
||||
decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -371,7 +371,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
|
||||
|
||||
//FIXME put trailing_onex into the context
|
||||
|
||||
if(n == CHROMA_DC_BLOCK_INDEX){
|
||||
if(n >= CHROMA_DC_BLOCK_INDEX){
|
||||
coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
|
||||
total_coeff= coeff_token>>2;
|
||||
}else{
|
||||
@ -383,9 +383,9 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
|
||||
total_coeff= pred_non_zero_count(h, n);
|
||||
coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
|
||||
total_coeff= coeff_token>>2;
|
||||
h->non_zero_count_cache[ scan8[n] ]= total_coeff;
|
||||
}
|
||||
}
|
||||
h->non_zero_count_cache[ scan8[n] ]= total_coeff;
|
||||
|
||||
//FIXME set last_non_zero?
|
||||
|
||||
@ -482,14 +482,14 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
|
||||
if(total_coeff == max_coeff)
|
||||
zeros_left=0;
|
||||
else{
|
||||
if(n == CHROMA_DC_BLOCK_INDEX)
|
||||
if(n >= CHROMA_DC_BLOCK_INDEX)
|
||||
zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
|
||||
else
|
||||
zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
|
||||
}
|
||||
|
||||
scantable += zeros_left + total_coeff - 1;
|
||||
if(n > 24){
|
||||
if(n >= LUMA_DC_BLOCK_INDEX){
|
||||
block[*scantable] = level[0];
|
||||
for(i=1;i<total_coeff && zeros_left > 0;i++) {
|
||||
if(zeros_left < 7)
|
||||
@ -988,7 +988,7 @@ decode_intra_mb:
|
||||
|
||||
if(cbp&0x30){
|
||||
for(chroma_idx=0; chroma_idx<2; chroma_idx++)
|
||||
if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
|
||||
if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user