From 52f2adc015ba4a05c3da730e87b63b343747424d Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Mon, 2 Feb 2015 15:51:45 +0100 Subject: [PATCH] avcodec/hevc: Update the USE_SAO_SMALL_BUFFER case for the alignment requirements in FFmpeg Use edge emu buffers And enable the code unconditionally Speed difference without USE_SAO_SMALL_BUFFER and with the new code: Decicycles: 26772->26220 (BO32), 83803->80942 (BO64) Signed-off-by: Michael Niedermayer --- libavcodec/hevc.c | 60 +++++----------------------------------- libavcodec/hevc.h | 11 +------- libavcodec/hevc_filter.c | 54 +++++------------------------------- 3 files changed, 15 insertions(+), 110 deletions(-) diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c index b77dbcde4c..0624cb0c72 100644 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@ -280,24 +280,6 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) return 0; } -static int get_buffer_sao(HEVCContext *s, AVFrame *frame, const HEVCSPS *sps) -{ - int ret, i; - - frame->width = FFALIGN(s->avctx->coded_width + 2, FF_INPUT_BUFFER_PADDING_SIZE); - frame->height = s->avctx->coded_height + 3; - if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) - return ret; - for (i = 0; frame->data[i]; i++) { - int offset = frame->linesize[i] + FF_INPUT_BUFFER_PADDING_SIZE; - frame->data[i] += offset; - } - frame->width = s->avctx->coded_width; - frame->height = s->avctx->coded_height; - - return 0; -} - static int set_sps(HEVCContext *s, const HEVCSPS *sps) { #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL) @@ -353,34 +335,19 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps) ff_videodsp_init (&s->vdsp, sps->bit_depth); if (sps->sao_enabled && !s->avctx->hwaccel) { -#ifdef USE_SAO_SMALL_BUFFER - { - int ctb_size = 1 << sps->log2_ctb_size; - int c_count = (sps->chroma_format_idc != 0) ? 3 : 1; - int c_idx, i; + int c_count = (sps->chroma_format_idc != 0) ? 3 : 1; + int c_idx; - for (i = 0; i < s->threads_number ; i++) { - HEVCLocalContext *lc = s->HEVClcList[i]; - lc->sao_pixel_buffer = - av_malloc(((ctb_size + 2) * (ctb_size + 2)) << - sps->pixel_shift); - } - for(c_idx = 0; c_idx < c_count; c_idx++) { - int w = sps->width >> sps->hshift[c_idx]; - int h = sps->height >> sps->vshift[c_idx]; - s->sao_pixel_buffer_h[c_idx] = + for(c_idx = 0; c_idx < c_count; c_idx++) { + int w = sps->width >> sps->hshift[c_idx]; + int h = sps->height >> sps->vshift[c_idx]; + s->sao_pixel_buffer_h[c_idx] = av_malloc((w * 2 * sps->ctb_height) << sps->pixel_shift); - s->sao_pixel_buffer_v[c_idx] = + s->sao_pixel_buffer_v[c_idx] = av_malloc((h * 2 * sps->ctb_width) << sps->pixel_shift); - } } -#else - av_frame_unref(s->tmp_frame); - ret = get_buffer_sao(s, s->tmp_frame, sps); - s->sao_frame = s->tmp_frame; -#endif } s->sps = sps; @@ -3211,17 +3178,10 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) av_freep(&s->cabac_state); -#ifdef USE_SAO_SMALL_BUFFER - for (i = 0; i < s->threads_number; i++) { - av_freep(&s->HEVClcList[i]->sao_pixel_buffer); - } for (i = 0; i < 3; i++) { av_freep(&s->sao_pixel_buffer_h[i]); av_freep(&s->sao_pixel_buffer_v[i]); } -#else - av_frame_free(&s->tmp_frame); -#endif av_frame_free(&s->output_frame); for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) { @@ -3281,12 +3241,6 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) if (!s->cabac_state) goto fail; -#ifndef USE_SAO_SMALL_BUFFER - s->tmp_frame = av_frame_alloc(); - if (!s->tmp_frame) - goto fail; -#endif - s->output_frame = av_frame_alloc(); if (!s->output_frame) goto fail; diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h index 482341af76..ae9a32acf0 100644 --- a/libavcodec/hevc.h +++ b/libavcodec/hevc.h @@ -36,8 +36,6 @@ #include "thread.h" #include "videodsp.h" -//#define USE_SAO_SMALL_BUFFER /* reduce the memory used by SAO */ - #define MAX_DPB_SIZE 16 // A.4.1 #define MAX_REFS 16 @@ -747,9 +745,6 @@ typedef struct HEVCNAL { } HEVCNAL; typedef struct HEVCLocalContext { -#ifdef USE_SAO_SMALL_BUFFER - uint8_t *sao_pixel_buffer; -#endif uint8_t cabac_state[HEVC_CONTEXTS]; uint8_t stat_coeff[4]; @@ -774,6 +769,7 @@ typedef struct HEVCLocalContext { int end_of_tiles_y; /* +7 is for subpixel interpolation, *2 for high bit depths */ DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2]; + /* The extended size between the new edge emu buffer is abused by SAO */ DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer2)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2]; DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]); @@ -813,13 +809,8 @@ typedef struct HEVCContext { AVFrame *frame; AVFrame *output_frame; -#ifdef USE_SAO_SMALL_BUFFER uint8_t *sao_pixel_buffer_h[3]; uint8_t *sao_pixel_buffer_v[3]; -#else - AVFrame *tmp_frame; - AVFrame *sao_frame; -#endif const HEVCVPS *vps; const HEVCSPS *sps; diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c index b8aa71e25a..e3bafa645d 100644 --- a/libavcodec/hevc_filter.c +++ b/libavcodec/hevc_filter.c @@ -161,14 +161,12 @@ int i, j; } } -#if defined(USE_SAO_SMALL_BUFFER) static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift) { if (pixel_shift) *(uint16_t *)dst = *(uint16_t *)src; else *dst = *src; - } static void copy_vert(uint8_t *dst, const uint8_t *src, @@ -210,7 +208,6 @@ static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src, copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src); } -#endif static void restore_tqb_pixels(HEVCContext *s, uint8_t *src1, const uint8_t *dst1, @@ -317,21 +314,16 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) int height = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0); int tab = band_tab[(FFALIGN(width, 8) >> 3) - 1]; uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)]; -#if defined(USE_SAO_SMALL_BUFFER) - int stride_dst = ((1 << (s->sps->log2_ctb_size)) + 2) << s->sps->pixel_shift; - uint8_t *dst = lc->sao_pixel_buffer + (1 * stride_dst) + (1 << s->sps->pixel_shift); -#else - int stride_dst = s->sao_frame->linesize[c_idx]; - uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride_dst + (x0 << s->sps->pixel_shift)]; -#endif + int stride_dst; + uint8_t *dst; switch (sao->type_idx[c_idx]) { case SAO_BAND: + dst = lc->edge_emu_buffer; + stride_dst = 2*MAX_PB_SIZE; copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src); -#if defined(USE_SAO_SMALL_BUFFER) copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx, x_ctb, y_ctb); -#endif s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst, sao->offset_val[c_idx], sao->band_position[c_idx], width, height); @@ -341,7 +333,6 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) break; case SAO_EDGE: { -#if defined(USE_SAO_SMALL_BUFFER) int w = s->sps->width >> s->sps->hshift[c_idx]; int h = s->sps->height >> s->sps->vshift[c_idx]; int left_edge = edges[0]; @@ -351,6 +342,9 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) int sh = s->sps->pixel_shift; int left_pixels, right_pixels; + stride_dst = 2*MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE; + dst = lc->edge_emu_buffer + stride_dst + FF_INPUT_BUFFER_PADDING_SIZE; + if (!top_edge) { int left = 1 - left_edge; int right = 1 - right_edge; @@ -433,40 +427,6 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx, x_ctb, y_ctb); -#else - uint8_t left_pixels; - /* get the CTB edge pixels from the SAO pixel buffer */ - left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED); - if (!edges[1]) { - uint8_t top_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED); - uint8_t top_right = !edges[2] && (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED); - if (CTB(s->sao, x_ctb , y_ctb-1).type_idx[c_idx] == 0) - memcpy( dst - stride_dst - (top_left << s->sps->pixel_shift), - src - stride_src - (top_left << s->sps->pixel_shift), - (top_left + width + top_right) << s->sps->pixel_shift); - else { - if (top_left) - memcpy( dst - stride_dst - (1 << s->sps->pixel_shift), - src - stride_src - (1 << s->sps->pixel_shift), - 1 << s->sps->pixel_shift); - if(top_right) - memcpy( dst - stride_dst + (width << s->sps->pixel_shift), - src - stride_src + (width << s->sps->pixel_shift), - 1 << s->sps->pixel_shift); - } - } - if (!edges[3]) { // bottom and bottom right - uint8_t bottom_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] != SAO_APPLIED); - memcpy( dst + height * stride_dst - (bottom_left << s->sps->pixel_shift), - src + height * stride_src - (bottom_left << s->sps->pixel_shift), - (width + 1 + bottom_left) << s->sps->pixel_shift); - } - copy_CTB(dst - (left_pixels << s->sps->pixel_shift), - src - (left_pixels << s->sps->pixel_shift), - (width + 1 + left_pixels) << s->sps->pixel_shift, height, stride_dst, stride_src); -#endif - /* XXX: could handle the restoration here to simplify the - DSP functions */ s->hevcdsp.sao_edge_filter[restore](src, dst, stride_src, stride_dst, sao,