h264: move intra4x4_pred_mode[_cache] into the per-slice context

This commit is contained in:
Anton Khirnov 2015-01-17 22:28:46 +01:00
parent 8b00f4df20
commit 7d8154edd5
8 changed files with 71 additions and 64 deletions

View File

@ -125,7 +125,7 @@ void ff_h264_draw_horiz_band(H264Context *h, int y, int height)
* Check if the top & left blocks are available if needed and
* change the dc mode so it only uses the available blocks.
*/
int ff_h264_check_intra4x4_pred_mode(H264Context *h)
int ff_h264_check_intra4x4_pred_mode(H264Context *h, H264SliceContext *sl)
{
static const int8_t top[12] = {
-1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
@ -137,14 +137,14 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h)
if (!(h->top_samples_available & 0x8000)) {
for (i = 0; i < 4; i++) {
int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
int status = top[sl->intra4x4_pred_mode_cache[scan8[0] + i]];
if (status < 0) {
av_log(h->avctx, AV_LOG_ERROR,
"top block unavailable for requested intra4x4 mode %d at %d %d\n",
status, h->mb_x, h->mb_y);
return AVERROR_INVALIDDATA;
} else if (status) {
h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
sl->intra4x4_pred_mode_cache[scan8[0] + i] = status;
}
}
}
@ -153,14 +153,14 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h)
static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
for (i = 0; i < 4; i++)
if (!(h->left_samples_available & mask[i])) {
int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
int status = left[sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
if (status < 0) {
av_log(h->avctx, AV_LOG_ERROR,
"left block unavailable for requested intra4x4 mode %d at %d %d\n",
status, h->mb_x, h->mb_y);
return AVERROR_INVALIDDATA;
} else if (status) {
h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
}
}
}
@ -409,6 +409,8 @@ int ff_h264_alloc_tables(H264Context *h)
FF_ALLOCZ_OR_GOTO(h->avctx, h->intra4x4_pred_mode,
row_mb_num * 8 * sizeof(uint8_t), fail)
h->slice_ctx[0].intra4x4_pred_mode = h->intra4x4_pred_mode;
FF_ALLOCZ_OR_GOTO(h->avctx, h->non_zero_count,
big_mb_num * 48 * sizeof(uint8_t), fail)
FF_ALLOCZ_OR_GOTO(h->avctx, h->slice_table_base,

View File

@ -321,6 +321,9 @@ typedef struct H264SliceContext {
int chroma_pred_mode;
int intra16x16_pred_mode;
int8_t intra4x4_pred_mode_cache[5 * 8];
int8_t(*intra4x4_pred_mode);
int topleft_mb_xy;
int top_mb_xy;
int topright_mb_xy;
@ -369,8 +372,6 @@ typedef struct H264Context {
int flags;
int workaround_bugs;
// prediction stuff
int8_t intra4x4_pred_mode_cache[5 * 8];
int8_t(*intra4x4_pred_mode);
H264PredContext hpc;
unsigned int topleft_samples_available;
@ -787,7 +788,7 @@ int ff_generate_sliding_window_mmcos(H264Context *h, int first_slice);
* Check if the top & left blocks are available if needed & change the
* dc mode so it only uses the available blocks.
*/
int ff_h264_check_intra4x4_pred_mode(H264Context *h);
int ff_h264_check_intra4x4_pred_mode(H264Context *h, H264SliceContext *sl);
/**
* Check if the top & left blocks are available if needed & change the
@ -913,11 +914,12 @@ static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale)
/**
* Get the predicted intra4x4 prediction mode.
*/
static av_always_inline int pred_intra_mode(H264Context *h, int n)
static av_always_inline int pred_intra_mode(H264Context *h,
H264SliceContext *sl, int n)
{
const int index8 = scan8[n];
const int left = h->intra4x4_pred_mode_cache[index8 - 1];
const int top = h->intra4x4_pred_mode_cache[index8 - 8];
const int left = sl->intra4x4_pred_mode_cache[index8 - 1];
const int top = sl->intra4x4_pred_mode_cache[index8 - 8];
const int min = FFMIN(left, top);
tprintf(h->avctx, "mode:%d %d min:%d\n", left, top, min);
@ -928,10 +930,11 @@ static av_always_inline int pred_intra_mode(H264Context *h, int n)
return min;
}
static av_always_inline void write_back_intra_pred_mode(H264Context *h)
static av_always_inline void write_back_intra_pred_mode(H264Context *h,
H264SliceContext *sl)
{
int8_t *i4x4 = h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
int8_t *i4x4_cache = h->intra4x4_pred_mode_cache;
int8_t *i4x4 = sl->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
int8_t *i4x4_cache = sl->intra4x4_pred_mode_cache;
AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
i4x4[4] = i4x4_cache[7 + 8 * 3];

View File

@ -2042,21 +2042,21 @@ decode_intra_mb:
if( dct8x8_allowed && get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ) ) {
mb_type |= MB_TYPE_8x8DCT;
for( i = 0; i < 16; i+=4 ) {
int pred = pred_intra_mode( h, i );
int pred = pred_intra_mode(h, sl, i);
int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
fill_rectangle(&sl->intra4x4_pred_mode_cache[scan8[i]], 2, 2, 8, mode, 1);
}
} else {
for( i = 0; i < 16; i++ ) {
int pred = pred_intra_mode( h, i );
h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
int pred = pred_intra_mode(h, sl, i);
sl->intra4x4_pred_mode_cache[scan8[i]] = decode_cabac_mb_intra4x4_pred_mode(h, pred);
av_dlog(h->avctx, "i4x4 pred=%d mode=%d\n", pred,
h->intra4x4_pred_mode_cache[scan8[i]]);
}
}
write_back_intra_pred_mode(h);
if( ff_h264_check_intra4x4_pred_mode(h) < 0 ) return -1;
write_back_intra_pred_mode(h, sl);
if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0 ) return -1;
} else {
sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl->intra16x16_pred_mode, 0);
if (sl->intra16x16_pred_mode < 0) return -1;

View File

@ -798,7 +798,7 @@ decode_intra_mb:
// fill_intra4x4_pred_table(h);
for(i=0; i<16; i+=di){
int mode= pred_intra_mode(h, i);
int mode = pred_intra_mode(h, sl, i);
if(!get_bits1(&h->gb)){
const int rem_mode= get_bits(&h->gb, 3);
@ -806,12 +806,12 @@ decode_intra_mb:
}
if(di==4)
fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
else
h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
}
write_back_intra_pred_mode(h);
if( ff_h264_check_intra4x4_pred_mode(h) < 0)
write_back_intra_pred_mode(h, sl);
if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0)
return -1;
}else{
sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl->intra16x16_pred_mode, 0);

View File

@ -631,7 +631,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
}
for (i = 0; i < 16; i += 4) {
uint8_t *const ptr = dest_y + block_offset[i];
const int dir = h->intra4x4_pred_mode_cache[scan8[i]];
const int dir = sl->intra4x4_pred_mode_cache[scan8[i]];
if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
} else {
@ -656,7 +656,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
}
for (i = 0; i < 16; i++) {
uint8_t *const ptr = dest_y + block_offset[i];
const int dir = h->intra4x4_pred_mode_cache[scan8[i]];
const int dir = sl->intra4x4_pred_mode_cache[scan8[i]];
if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);

View File

@ -509,21 +509,21 @@ static void fill_decode_caches(H264Context *h, H264SliceContext *sl, int mb_type
if (IS_INTRA4x4(mb_type)) {
if (IS_INTRA4x4(top_type)) {
AV_COPY32(h->intra4x4_pred_mode_cache + 4 + 8 * 0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
AV_COPY32(sl->intra4x4_pred_mode_cache + 4 + 8 * 0, sl->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
} else {
h->intra4x4_pred_mode_cache[4 + 8 * 0] =
h->intra4x4_pred_mode_cache[5 + 8 * 0] =
h->intra4x4_pred_mode_cache[6 + 8 * 0] =
h->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask);
sl->intra4x4_pred_mode_cache[4 + 8 * 0] =
sl->intra4x4_pred_mode_cache[5 + 8 * 0] =
sl->intra4x4_pred_mode_cache[6 + 8 * 0] =
sl->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask);
}
for (i = 0; i < 2; i++) {
if (IS_INTRA4x4(left_type[LEFT(i)])) {
int8_t *mode = h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]];
h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]];
int8_t *mode = sl->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
sl->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]];
sl->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]];
} else {
h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] =
h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask);
sl->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] =
sl->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask);
}
}
}

View File

@ -363,9 +363,11 @@ void h264_init_dequant_tables(H264Context *h)
/**
* Mimic alloc_tables(), but for every context thread.
*/
static void clone_tables(H264Context *dst, H264Context *src, int i)
static void clone_tables(H264Context *dst, H264SliceContext *sl,
H264Context *src, int i)
{
dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride;
sl->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride;
dst->non_zero_count = src->non_zero_count;
dst->slice_table = src->slice_table;
dst->cbp_table = src->cbp_table;
@ -1144,7 +1146,7 @@ static int h264_slice_header_init(H264Context *h, int reinit)
h->slice_ctx[i].h264 = c;
init_scan_tables(c);
clone_tables(c, h, i);
clone_tables(c, &h->slice_ctx[i], h, i);
c->context_initialized = 1;
}

View File

@ -533,7 +533,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
*/
for (m = 0; m < 2; m++) {
if (h->mb_x > 0 && h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
if (h->mb_x > 0 && sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
for (i = 0; i < 4; i++)
AV_COPY32(h->mv_cache[m][scan8[0] - 1 + i * 8],
h->cur_pic.motion_val[m][b_xy - 1 + i * h->b_stride]);
@ -546,21 +546,21 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
h->cur_pic.motion_val[m][b_xy - h->b_stride],
4 * 2 * sizeof(int16_t));
memset(&h->ref_cache[m][scan8[0] - 1 * 8],
(h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
(sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
if (h->mb_x < h->mb_width - 1) {
AV_COPY32(h->mv_cache[m][scan8[0] + 4 - 1 * 8],
h->cur_pic.motion_val[m][b_xy - h->b_stride + 4]);
h->ref_cache[m][scan8[0] + 4 - 1 * 8] =
(h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
(sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
} else
h->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
if (h->mb_x > 0) {
AV_COPY32(h->mv_cache[m][scan8[0] - 1 - 1 * 8],
h->cur_pic.motion_val[m][b_xy - h->b_stride - 1]);
h->ref_cache[m][scan8[0] - 1 - 1 * 8] =
(h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
(sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
} else
h->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
} else
@ -596,22 +596,22 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
mb_type = MB_TYPE_16x16;
} else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
memset(h->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
memset(sl->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
if (mb_type == 8) {
if (h->mb_x > 0) {
for (i = 0; i < 4; i++)
h->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
sl->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
if (sl->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
h->left_samples_available = 0x5F5F;
}
if (h->mb_y > 0) {
h->intra4x4_pred_mode_cache[4 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
h->intra4x4_pred_mode_cache[5 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
h->intra4x4_pred_mode_cache[6 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
h->intra4x4_pred_mode_cache[7 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
sl->intra4x4_pred_mode_cache[4 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
sl->intra4x4_pred_mode_cache[5 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
sl->intra4x4_pred_mode_cache[6 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
sl->intra4x4_pred_mode_cache[7 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
if (h->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
if (sl->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
h->top_samples_available = 0x33FF;
}
@ -625,8 +625,8 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
return -1;
}
left = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
top = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
left = &sl->intra4x4_pred_mode_cache[scan8[i] - 1];
top = &sl->intra4x4_pred_mode_cache[scan8[i] - 8];
left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
@ -638,19 +638,19 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
}
} else { /* mb_type == 33, DC_128_PRED block type */
for (i = 0; i < 4; i++)
memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
memset(&sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
}
write_back_intra_pred_mode(h);
write_back_intra_pred_mode(h, sl);
if (mb_type == 8) {
ff_h264_check_intra4x4_pred_mode(h);
ff_h264_check_intra4x4_pred_mode(h, sl);
h->top_samples_available = (h->mb_y == 0) ? 0x33FF : 0xFFFF;
h->left_samples_available = (h->mb_x == 0) ? 0x5F5F : 0xFFFF;
} else {
for (i = 0; i < 4; i++)
memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
memset(&sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
h->top_samples_available = 0x33FF;
h->left_samples_available = 0x5F5F;
@ -681,7 +681,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
}
}
if (!IS_INTRA4x4(mb_type)) {
memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
}
if (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B) {
memset(h->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
@ -846,17 +846,17 @@ static int svq3_decode_slice_header(AVCodecContext *avctx)
/* reset intra predictors and invalidate motion vector references */
if (h->mb_x > 0) {
memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
-1, 4 * sizeof(int8_t));
memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_x],
memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_x],
-1, 8 * sizeof(int8_t) * h->mb_x);
}
if (h->mb_y > 0) {
memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
-1, 8 * sizeof(int8_t) * (h->mb_width - h->mb_x));
if (h->mb_x > 0)
h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
}
return 0;