diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 6365afb303..48a2455b22 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -996,7 +996,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src } } -static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ +static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple, int pixel_shift){ MpegEncContext * const s = &h->s; int deblock_left; int deblock_top; @@ -1021,15 +1021,15 @@ static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_c deblock_top = (s->mb_y > !!MB_FIELD); } - src_y -= linesize + 1 + h->pixel_shift; - src_cb -= uvlinesize + 1 + h->pixel_shift; - src_cr -= uvlinesize + 1 + h->pixel_shift; + src_y -= linesize + 1 + pixel_shift; + src_cb -= uvlinesize + 1 + pixel_shift; + src_cr -= uvlinesize + 1 + pixel_shift; top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; top_border = h->top_borders[top_idx][s->mb_x]; #define XCHG(a,b,xchg)\ - if (h->pixel_shift) {\ + if (pixel_shift) {\ if (xchg) {\ AV_SWAP64(b+0,a+0);\ AV_SWAP64(b+8,a+8);\ @@ -1042,41 +1042,41 @@ else AV_COPY64(b,a); if(deblock_top){ if(deblock_left){ - XCHG(top_border_m1+(8<pixel_shift), src_y -(7<pixel_shift), 1); + XCHG(top_border_m1+(8<pixel_shift), 1); } - XCHG(top_border+(0<pixel_shift), src_y +(1<pixel_shift), xchg); - XCHG(top_border+(8<pixel_shift), src_y +(9<pixel_shift), 1); + XCHG(top_border+(0<mb_x+1 < s->mb_width){ - XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<pixel_shift), 1); + XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<flags&CODEC_FLAG_GRAY)){ if(deblock_top){ if(deblock_left){ - XCHG(top_border_m1+(16<pixel_shift), src_cb -(7<pixel_shift), 1); - XCHG(top_border_m1+(24<pixel_shift), src_cr -(7<pixel_shift), 1); + XCHG(top_border_m1+(16<pixel_shift), src_cb+1+h->pixel_shift, 1); - XCHG(top_border+(24<pixel_shift), src_cr+1+h->pixel_shift, 1); + XCHG(top_border+(16<pixel_shift) +static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index, int pixel_shift) { + if (!pixel_shift) return mb[index]; else return ((int32_t*)mb)[index]; } -static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value) { - if (!h->pixel_shift) +static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value, int pixel_shift) { + if (!pixel_shift) mb[index] = value; else ((int32_t*)mb)[index] = value; } -static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ +static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){ MpegEncContext * const s = &h->s; const int mb_x= s->mb_x; const int mb_y= s->mb_y; @@ -1092,12 +1092,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); - dest_y = s->current_picture.data[0] + ((mb_x<pixel_shift) + mb_y * s->linesize ) * 16; - dest_cb = s->current_picture.data[1] + ((mb_x<pixel_shift) + mb_y * s->uvlinesize) * 8; - dest_cr = s->current_picture.data[2] + ((mb_x<pixel_shift) + mb_y * s->uvlinesize) * 8; + dest_y = s->current_picture.data[0] + ((mb_x<linesize ) * 16; + dest_cb = s->current_picture.data[1] + ((mb_x<uvlinesize) * 8; + dest_cr = s->current_picture.data[2] + ((mb_x<uvlinesize) * 8; - s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<pixel_shift), s->linesize, 4); - s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<pixel_shift), dest_cr - dest_cb, 2); + s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<linesize, 4); + s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<list_counts[mb_xy]= h->list_count; @@ -1134,7 +1134,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ } if (!simple && IS_INTRA_PCM(mb_type)) { - if (h->pixel_shift) { + if (pixel_shift) { const int bit_depth = h->sps.bit_depth_luma; int j; GetBitContext gb; @@ -1167,7 +1167,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ } else { if(IS_INTRA(mb_type)){ if(h->deblocking_filter) - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple); + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift); if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); @@ -1188,16 +1188,16 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ uint8_t * const ptr= dest_y + block_offset[i]; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ - h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<pixel_shift), linesize); + h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<non_zero_count_cache[ scan8[i] ]; h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<topright_samples_available<mb, i*16)) - idct_dc_add(ptr, h->mb + (i*16<pixel_shift), linesize); + if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift)) + idct_dc_add(ptr, h->mb + (i*16<mb + (i*16<pixel_shift), linesize); + idct_add (ptr, h->mb + (i*16<intra4x4_pred_mode_cache[ scan8[i] ]; if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ - h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<pixel_shift), linesize); + h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<topright_samples_available<pixel_shift) { + if (pixel_shift) { tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; topright= (uint8_t*) &tr_high; } else { @@ -1231,7 +1231,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ topright= (uint8_t*) &tr; } }else - topright= ptr + (4<pixel_shift) - linesize; + topright= ptr + (4<non_zero_count_cache[ scan8[i] ]; if(nnz){ if(is_h264){ - if(nnz == 1 && dctcoef_get(h, h->mb, i*16)) - idct_dc_add(ptr, h->mb + (i*16<pixel_shift), linesize); + if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift)) + idct_dc_add(ptr, h->mb + (i*16<mb + (i*16<pixel_shift), linesize); + idct_add (ptr, h->mb + (i*16<mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i)); + dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i,pixel_shift),pixel_shift); } } } @@ -1273,7 +1273,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ #endif } if(h->deblocking_filter) - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift); }else if(is_h264){ ff_hl_motion(h, dest_y, dest_cb, dest_cr, s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, @@ -1290,8 +1290,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); }else{ for(i=0; i<16; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16)) - s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<pixel_shift), linesize); + if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift)) + s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<dsp.add_pixels8 : s->dsp.add_pixels4; for(i=0; i<16; i+=di){ if(h->non_zero_count_cache[ scan8[i] ]){ - idct_add(dest_y + block_offset[i], h->mb + (i*16<pixel_shift), linesize); + idct_add(dest_y + block_offset[i], h->mb + (i*16<sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ - h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<pixel_shift), uvlinesize); - h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<pixel_shift), uvlinesize); + h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<dsp.add_pixels4; for(i=16; i<16+8; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16)) - idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<pixel_shift), uvlinesize); + if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift)) + idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) - h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); + h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); h->h264dsp.h264_idct_add8(dest, block_offset, h->mb, uvlinesize, h->non_zero_count_cache); @@ -1372,15 +1372,15 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ /** * Process a macroblock; this case avoids checks for expensive uncommon cases. */ -static void hl_decode_mb_simple(H264Context *h){ - hl_decode_mb_internal(h, 1); +static void hl_decode_mb_simple8(H264Context *h){ + hl_decode_mb_internal(h, 1, 0); } /** * Process a macroblock; this handles edge cases, such as interlacing. */ static void av_noinline hl_decode_mb_complex(H264Context *h){ - hl_decode_mb_internal(h, 0); + hl_decode_mb_internal(h, 0, h->pixel_shift); } void ff_h264_hl_decode_mb(H264Context *h){ @@ -1389,9 +1389,11 @@ void ff_h264_hl_decode_mb(H264Context *h){ const int mb_type= s->current_picture.mb_type[mb_xy]; int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; - if (is_complex) + if (is_complex || h->pixel_shift) hl_decode_mb_complex(h); - else hl_decode_mb_simple(h); + else{ + hl_decode_mb_simple8(h); + } } static int pred_weight_table(H264Context *h){