Merge commit '7d8154edd594981e7891d57de91f2260f8a62939'

* commit '7d8154edd594981e7891d57de91f2260f8a62939':
  h264: move intra4x4_pred_mode[_cache] into the per-slice context

Conflicts:
	libavcodec/h264.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2015-03-21 13:44:38 +01:00
commit 6abd1e901c
8 changed files with 71 additions and 64 deletions

View File

@ -142,7 +142,7 @@ void ff_h264_draw_horiz_band(H264Context *h, int y, int height)
* Check if the top & left blocks are available if needed and
* change the dc mode so it only uses the available blocks.
*/
int ff_h264_check_intra4x4_pred_mode(H264Context *h)
int ff_h264_check_intra4x4_pred_mode(H264Context *h, H264SliceContext *sl)
{
static const int8_t top[12] = {
-1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
@ -154,14 +154,14 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h)
if (!(h->top_samples_available & 0x8000)) {
for (i = 0; i < 4; i++) {
int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
int status = top[sl->intra4x4_pred_mode_cache[scan8[0] + i]];
if (status < 0) {
av_log(h->avctx, AV_LOG_ERROR,
"top block unavailable for requested intra4x4 mode %d at %d %d\n",
status, h->mb_x, h->mb_y);
return AVERROR_INVALIDDATA;
} else if (status) {
h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
sl->intra4x4_pred_mode_cache[scan8[0] + i] = status;
}
}
}
@ -170,14 +170,14 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h)
static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
for (i = 0; i < 4; i++)
if (!(h->left_samples_available & mask[i])) {
int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
int status = left[sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
if (status < 0) {
av_log(h->avctx, AV_LOG_ERROR,
"left block unavailable for requested intra4x4 mode %d at %d %d\n",
status, h->mb_x, h->mb_y);
return AVERROR_INVALIDDATA;
} else if (status) {
h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
}
}
}
@ -432,6 +432,8 @@ int ff_h264_alloc_tables(H264Context *h)
FF_ALLOCZ_ARRAY_OR_GOTO(h->avctx, h->intra4x4_pred_mode,
row_mb_num, 8 * sizeof(uint8_t), fail)
h->slice_ctx[0].intra4x4_pred_mode = h->intra4x4_pred_mode;
FF_ALLOCZ_OR_GOTO(h->avctx, h->non_zero_count,
big_mb_num * 48 * sizeof(uint8_t), fail)
FF_ALLOCZ_OR_GOTO(h->avctx, h->slice_table_base,

View File

@ -358,6 +358,9 @@ typedef struct H264SliceContext {
int chroma_pred_mode;
int intra16x16_pred_mode;
int8_t intra4x4_pred_mode_cache[5 * 8];
int8_t(*intra4x4_pred_mode);
int topleft_mb_xy;
int top_mb_xy;
int topright_mb_xy;
@ -408,8 +411,6 @@ typedef struct H264Context {
int flags;
int workaround_bugs;
// prediction stuff
int8_t intra4x4_pred_mode_cache[5 * 8];
int8_t(*intra4x4_pred_mode);
H264PredContext hpc;
unsigned int topleft_samples_available;
@ -847,7 +848,7 @@ int ff_generate_sliding_window_mmcos(H264Context *h, int first_slice);
* Check if the top & left blocks are available if needed & change the
* dc mode so it only uses the available blocks.
*/
int ff_h264_check_intra4x4_pred_mode(H264Context *h);
int ff_h264_check_intra4x4_pred_mode(H264Context *h, H264SliceContext *sl);
/**
* Check if the top & left blocks are available if needed & change the
@ -986,11 +987,12 @@ static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale)
/**
* Get the predicted intra4x4 prediction mode.
*/
static av_always_inline int pred_intra_mode(H264Context *h, int n)
static av_always_inline int pred_intra_mode(H264Context *h,
H264SliceContext *sl, int n)
{
const int index8 = scan8[n];
const int left = h->intra4x4_pred_mode_cache[index8 - 1];
const int top = h->intra4x4_pred_mode_cache[index8 - 8];
const int left = sl->intra4x4_pred_mode_cache[index8 - 1];
const int top = sl->intra4x4_pred_mode_cache[index8 - 8];
const int min = FFMIN(left, top);
tprintf(h->avctx, "mode:%d %d min:%d\n", left, top, min);
@ -1001,10 +1003,11 @@ static av_always_inline int pred_intra_mode(H264Context *h, int n)
return min;
}
static av_always_inline void write_back_intra_pred_mode(H264Context *h)
static av_always_inline void write_back_intra_pred_mode(H264Context *h,
H264SliceContext *sl)
{
int8_t *i4x4 = h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
int8_t *i4x4_cache = h->intra4x4_pred_mode_cache;
int8_t *i4x4 = sl->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
int8_t *i4x4_cache = sl->intra4x4_pred_mode_cache;
AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
i4x4[4] = i4x4_cache[7 + 8 * 3];

View File

@ -2048,21 +2048,21 @@ decode_intra_mb:
if( dct8x8_allowed && get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ) ) {
mb_type |= MB_TYPE_8x8DCT;
for( i = 0; i < 16; i+=4 ) {
int pred = pred_intra_mode( h, i );
int pred = pred_intra_mode(h, sl, i);
int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
fill_rectangle(&sl->intra4x4_pred_mode_cache[scan8[i]], 2, 2, 8, mode, 1);
}
} else {
for( i = 0; i < 16; i++ ) {
int pred = pred_intra_mode( h, i );
h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
int pred = pred_intra_mode(h, sl, i);
sl->intra4x4_pred_mode_cache[scan8[i]] = decode_cabac_mb_intra4x4_pred_mode(h, pred);
av_dlog(h->avctx, "i4x4 pred=%d mode=%d\n", pred,
h->intra4x4_pred_mode_cache[scan8[i]]);
}
}
write_back_intra_pred_mode(h);
if( ff_h264_check_intra4x4_pred_mode(h) < 0 ) return -1;
write_back_intra_pred_mode(h, sl);
if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0 ) return -1;
} else {
sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl->intra16x16_pred_mode, 0);
if (sl->intra16x16_pred_mode < 0) return -1;

View File

@ -809,7 +809,7 @@ decode_intra_mb:
// fill_intra4x4_pred_table(h);
for(i=0; i<16; i+=di){
int mode= pred_intra_mode(h, i);
int mode = pred_intra_mode(h, sl, i);
if(!get_bits1(&h->gb)){
const int rem_mode= get_bits(&h->gb, 3);
@ -817,12 +817,12 @@ decode_intra_mb:
}
if(di==4)
fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
else
h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
}
write_back_intra_pred_mode(h);
if( ff_h264_check_intra4x4_pred_mode(h) < 0)
write_back_intra_pred_mode(h, sl);
if (ff_h264_check_intra4x4_pred_mode(h, sl) < 0)
return -1;
}else{
sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl->intra16x16_pred_mode, 0);

View File

@ -631,7 +631,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
}
for (i = 0; i < 16; i += 4) {
uint8_t *const ptr = dest_y + block_offset[i];
const int dir = h->intra4x4_pred_mode_cache[scan8[i]];
const int dir = sl->intra4x4_pred_mode_cache[scan8[i]];
if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
if (h->x264_build != -1) {
h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
@ -661,7 +661,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
}
for (i = 0; i < 16; i++) {
uint8_t *const ptr = dest_y + block_offset[i];
const int dir = h->intra4x4_pred_mode_cache[scan8[i]];
const int dir = sl->intra4x4_pred_mode_cache[scan8[i]];
if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);

View File

@ -509,21 +509,21 @@ static void fill_decode_caches(H264Context *h, H264SliceContext *sl, int mb_type
if (IS_INTRA4x4(mb_type)) {
if (IS_INTRA4x4(top_type)) {
AV_COPY32(h->intra4x4_pred_mode_cache + 4 + 8 * 0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
AV_COPY32(sl->intra4x4_pred_mode_cache + 4 + 8 * 0, sl->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
} else {
h->intra4x4_pred_mode_cache[4 + 8 * 0] =
h->intra4x4_pred_mode_cache[5 + 8 * 0] =
h->intra4x4_pred_mode_cache[6 + 8 * 0] =
h->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask);
sl->intra4x4_pred_mode_cache[4 + 8 * 0] =
sl->intra4x4_pred_mode_cache[5 + 8 * 0] =
sl->intra4x4_pred_mode_cache[6 + 8 * 0] =
sl->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask);
}
for (i = 0; i < 2; i++) {
if (IS_INTRA4x4(left_type[LEFT(i)])) {
int8_t *mode = h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]];
h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]];
int8_t *mode = sl->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
sl->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]];
sl->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]];
} else {
h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] =
h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask);
sl->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] =
sl->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask);
}
}
}

View File

@ -384,9 +384,11 @@ void ff_h264_init_dequant_tables(H264Context *h)
/**
* Mimic alloc_tables(), but for every context thread.
*/
static void clone_tables(H264Context *dst, H264Context *src, int i)
static void clone_tables(H264Context *dst, H264SliceContext *sl,
H264Context *src, int i)
{
dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride;
sl->intra4x4_pred_mode = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride;
dst->non_zero_count = src->non_zero_count;
dst->slice_table = src->slice_table;
dst->cbp_table = src->cbp_table;
@ -1240,7 +1242,7 @@ static int h264_slice_header_init(H264Context *h, int reinit)
h->slice_ctx[i].h264 = c;
init_scan_tables(c);
clone_tables(c, h, i);
clone_tables(c, &h->slice_ctx[i], h, i);
c->context_initialized = 1;
}

View File

@ -540,7 +540,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
*/
for (m = 0; m < 2; m++) {
if (h->mb_x > 0 && h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
if (h->mb_x > 0 && sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
for (i = 0; i < 4; i++)
AV_COPY32(h->mv_cache[m][scan8[0] - 1 + i * 8],
h->cur_pic.motion_val[m][b_xy - 1 + i * h->b_stride]);
@ -553,21 +553,21 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
h->cur_pic.motion_val[m][b_xy - h->b_stride],
4 * 2 * sizeof(int16_t));
memset(&h->ref_cache[m][scan8[0] - 1 * 8],
(h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
(sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
if (h->mb_x < h->mb_width - 1) {
AV_COPY32(h->mv_cache[m][scan8[0] + 4 - 1 * 8],
h->cur_pic.motion_val[m][b_xy - h->b_stride + 4]);
h->ref_cache[m][scan8[0] + 4 - 1 * 8] =
(h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
(sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
} else
h->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
if (h->mb_x > 0) {
AV_COPY32(h->mv_cache[m][scan8[0] - 1 - 1 * 8],
h->cur_pic.motion_val[m][b_xy - h->b_stride - 1]);
h->ref_cache[m][scan8[0] - 1 - 1 * 8] =
(h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
(sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
} else
h->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
} else
@ -603,22 +603,22 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
mb_type = MB_TYPE_16x16;
} else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
memset(h->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
memset(sl->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
if (mb_type == 8) {
if (h->mb_x > 0) {
for (i = 0; i < 4; i++)
h->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
sl->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
if (sl->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
h->left_samples_available = 0x5F5F;
}
if (h->mb_y > 0) {
h->intra4x4_pred_mode_cache[4 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
h->intra4x4_pred_mode_cache[5 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
h->intra4x4_pred_mode_cache[6 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
h->intra4x4_pred_mode_cache[7 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
sl->intra4x4_pred_mode_cache[4 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
sl->intra4x4_pred_mode_cache[5 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
sl->intra4x4_pred_mode_cache[6 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
sl->intra4x4_pred_mode_cache[7 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
if (h->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
if (sl->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
h->top_samples_available = 0x33FF;
}
@ -632,8 +632,8 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
return -1;
}
left = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
top = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
left = &sl->intra4x4_pred_mode_cache[scan8[i] - 1];
top = &sl->intra4x4_pred_mode_cache[scan8[i] - 8];
left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
@ -645,19 +645,19 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
}
} else { /* mb_type == 33, DC_128_PRED block type */
for (i = 0; i < 4; i++)
memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
memset(&sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
}
write_back_intra_pred_mode(h);
write_back_intra_pred_mode(h, sl);
if (mb_type == 8) {
ff_h264_check_intra4x4_pred_mode(h);
ff_h264_check_intra4x4_pred_mode(h, sl);
h->top_samples_available = (h->mb_y == 0) ? 0x33FF : 0xFFFF;
h->left_samples_available = (h->mb_x == 0) ? 0x5F5F : 0xFFFF;
} else {
for (i = 0; i < 4; i++)
memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
memset(&sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
h->top_samples_available = 0x33FF;
h->left_samples_available = 0x5F5F;
@ -688,7 +688,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
}
}
if (!IS_INTRA4x4(mb_type)) {
memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
}
if (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B) {
memset(h->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
@ -853,17 +853,17 @@ static int svq3_decode_slice_header(AVCodecContext *avctx)
/* reset intra predictors and invalidate motion vector references */
if (h->mb_x > 0) {
memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
-1, 4 * sizeof(int8_t));
memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_x],
memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_x],
-1, 8 * sizeof(int8_t) * h->mb_x);
}
if (h->mb_y > 0) {
memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
-1, 8 * sizeof(int8_t) * (h->mb_width - h->mb_x));
if (h->mb_x > 0)
h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
}
return 0;