2013-10-12 09:55:48 +00:00
|
|
|
/*
|
2013-12-20 23:08:50 +00:00
|
|
|
* HEVC video decoder
|
2013-10-12 09:55:48 +00:00
|
|
|
*
|
|
|
|
* Copyright (C) 2012 - 2013 Guillaume Martres
|
2014-04-28 14:12:28 +00:00
|
|
|
* Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
|
|
|
|
*
|
2013-10-12 09:55:48 +00:00
|
|
|
*
|
|
|
|
* This file is part of FFmpeg.
|
|
|
|
*
|
|
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef AVCODEC_HEVCDSP_H
|
|
|
|
#define AVCODEC_HEVCDSP_H
|
|
|
|
|
|
|
|
#include "get_bits.h"
|
|
|
|
|
2014-07-28 17:17:25 +00:00
|
|
|
#define MAX_PB_SIZE 64
|
|
|
|
|
2013-12-20 23:08:50 +00:00
|
|
|
typedef struct SAOParams {
|
|
|
|
int offset_abs[3][4]; ///< sao_offset_abs
|
|
|
|
int offset_sign[3][4]; ///< sao_offset_sign
|
2013-10-12 09:55:48 +00:00
|
|
|
|
2014-07-23 11:39:40 +00:00
|
|
|
uint8_t band_position[3]; ///< sao_band_position
|
2013-10-12 09:55:48 +00:00
|
|
|
|
2013-12-20 23:08:50 +00:00
|
|
|
int eo_class[3]; ///< sao_eo_class
|
2013-10-12 09:55:48 +00:00
|
|
|
|
2014-07-23 11:39:40 +00:00
|
|
|
int16_t offset_val[3][5]; ///<SaoOffsetVal
|
2013-10-12 09:55:48 +00:00
|
|
|
|
2013-12-20 23:08:50 +00:00
|
|
|
uint8_t type_idx[3]; ///< sao_type_idx
|
|
|
|
} SAOParams;
|
2013-10-12 09:55:48 +00:00
|
|
|
|
2013-12-20 23:08:50 +00:00
|
|
|
typedef struct HEVCDSPContext {
|
2014-07-14 22:16:53 +00:00
|
|
|
void (*put_pcm)(uint8_t *_dst, ptrdiff_t _stride, int width, int height,
|
|
|
|
struct GetBitContext *gb, int pcm_bit_depth);
|
2013-10-12 09:55:48 +00:00
|
|
|
|
2014-07-14 19:17:33 +00:00
|
|
|
void (*transform_add[4])(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride);
|
2013-10-12 09:55:48 +00:00
|
|
|
|
2014-07-14 19:17:33 +00:00
|
|
|
void (*transform_skip)(int16_t *coeffs, int16_t log2_size);
|
|
|
|
|
2014-07-14 22:20:22 +00:00
|
|
|
void (*transform_rdpcm)(int16_t *coeffs, int16_t log2_size, int mode);
|
|
|
|
|
2014-07-14 19:17:33 +00:00
|
|
|
void (*idct_4x4_luma)(int16_t *coeffs);
|
|
|
|
|
|
|
|
void (*idct[4])(int16_t *coeffs, int col_limit);
|
|
|
|
|
|
|
|
void (*idct_dc[4])(int16_t *coeffs);
|
2014-06-13 11:29:17 +00:00
|
|
|
|
x86/hevc: add ff_hevc_sao_band_filter_{8,10,12}_{sse2,avx,avx2}
Original x86 intrinsics code and initial 8bit yasm port by Pierre-Edouard Lepere.
10/12bit yasm ports, refactoring and optimizations by James Almer
Benchmarks of BQTerrace_1920x1080_60_qp22.bin with an Intel Core i5-4200U
width 32
40338 decicycles in sao_band_filter_0_8, 2048 runs, 0 skips
8056 decicycles in ff_hevc_sao_band_filter_8_32_sse2, 2048 runs, 0 skips
7458 decicycles in ff_hevc_sao_band_filter_8_32_avx, 2048 runs, 0 skips
4504 decicycles in ff_hevc_sao_band_filter_8_32_avx2, 2048 runs, 0 skips
width 64
136046 decicycles in sao_band_filter_0_8, 16384 runs, 0 skips
28576 decicycles in ff_hevc_sao_band_filter_8_32_sse2, 16384 runs, 0 skips
26707 decicycles in ff_hevc_sao_band_filter_8_32_avx, 16384 runs, 0 skips
14387 decicycles in ff_hevc_sao_band_filter_8_32_avx2, 16384 runs, 0 skips
Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
2015-02-01 18:01:36 +00:00
|
|
|
void (*sao_band_filter[5])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
|
|
|
|
int16_t *sao_offset_val, int sao_left_class, int width, int height);
|
2014-07-15 08:23:20 +00:00
|
|
|
|
2015-02-04 23:19:36 +00:00
|
|
|
/* implicit stride_src parameter has value of 2 * MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE */
|
x86/hevcdsp: add ff_hevc_sao_edge_filter_8_{ssse3,avx2}
Original x86 intrinsics code and initial yasm port by Pierre-Edouard Lepere.
Refactoring and optimizations by James Almer.
Benchmarks of BQTerrace_1920x1080_60_qp22.bin with an Intel Core i5-4200U
Width 32
158583 decicycles in edge, sao_edge_filter_8 runs, 0 skips
5205 decicycles in ff_hevc_sao_edge_filter_32_8_ssse3, 32767 runs, 1 skips
2942 decicycles in ff_hevc_sao_edge_filter_32_8_avx2, 32767 runs, 1 skips
Width 64
705639 decicycles in sao_edge_filter_8, 262144 runs, 0 skips
19224 decicycles in ff_hevc_sao_edge_filter_64_8_ssse3, 262111 runs, 33 skips
10433 decicycles in ff_hevc_sao_edge_filter_64_8_avx2, 262115 runs, 29 skips
Signed-off-by: James Almer <jamrial@gmail.com>
2015-02-04 23:21:56 +00:00
|
|
|
void (*sao_edge_filter[5])(uint8_t *_dst /* align 16 */, uint8_t *_src /* align 32 */, ptrdiff_t stride_dst,
|
|
|
|
int16_t *sao_offset_val, int sao_eo_class, int width, int height);
|
2015-02-04 01:34:52 +00:00
|
|
|
|
|
|
|
void (*sao_edge_restore[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
|
|
|
|
struct SAOParams *sao, int *borders, int _width, int _height, int c_idx,
|
|
|
|
uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge);
|
2013-12-20 23:08:50 +00:00
|
|
|
|
2014-07-28 17:17:26 +00:00
|
|
|
void (*put_hevc_qpel[10][2][2])(int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
|
2014-04-26 13:35:23 +00:00
|
|
|
int height, intptr_t mx, intptr_t my, int width);
|
|
|
|
void (*put_hevc_qpel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
|
|
|
|
int height, intptr_t mx, intptr_t my, int width);
|
|
|
|
void (*put_hevc_qpel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
|
|
|
|
int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
|
|
|
|
|
|
|
|
void (*put_hevc_qpel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
|
2014-07-28 10:13:06 +00:00
|
|
|
int16_t *src2,
|
2014-04-26 13:35:23 +00:00
|
|
|
int height, intptr_t mx, intptr_t my, int width);
|
|
|
|
void (*put_hevc_qpel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
|
2014-07-28 10:13:06 +00:00
|
|
|
int16_t *src2,
|
2014-04-26 13:35:23 +00:00
|
|
|
int height, int denom, int wx0, int wx1,
|
|
|
|
int ox0, int ox1, intptr_t mx, intptr_t my, int width);
|
2014-07-28 17:17:26 +00:00
|
|
|
void (*put_hevc_epel[10][2][2])(int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
|
2014-04-26 13:35:23 +00:00
|
|
|
int height, intptr_t mx, intptr_t my, int width);
|
|
|
|
|
|
|
|
void (*put_hevc_epel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
|
|
|
|
int height, intptr_t mx, intptr_t my, int width);
|
|
|
|
void (*put_hevc_epel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
|
|
|
|
int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
|
|
|
|
void (*put_hevc_epel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
|
2014-07-28 10:13:06 +00:00
|
|
|
int16_t *src2,
|
2014-04-26 13:35:23 +00:00
|
|
|
int height, intptr_t mx, intptr_t my, int width);
|
|
|
|
void (*put_hevc_epel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
|
2014-07-28 10:13:06 +00:00
|
|
|
int16_t *src2,
|
2014-04-26 13:35:23 +00:00
|
|
|
int height, int denom, int wx0, int ox0, int wx1,
|
|
|
|
int ox1, intptr_t mx, intptr_t my, int width);
|
2013-12-20 23:08:50 +00:00
|
|
|
|
|
|
|
void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
2014-08-04 20:29:37 +00:00
|
|
|
int beta, int32_t *tc,
|
2013-12-20 23:08:50 +00:00
|
|
|
uint8_t *no_p, uint8_t *no_q);
|
|
|
|
void (*hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
|
2014-08-04 20:29:37 +00:00
|
|
|
int beta, int32_t *tc,
|
2013-12-20 23:08:50 +00:00
|
|
|
uint8_t *no_p, uint8_t *no_q);
|
|
|
|
void (*hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
|
2014-08-04 20:29:37 +00:00
|
|
|
int32_t *tc, uint8_t *no_p, uint8_t *no_q);
|
2013-12-20 23:08:50 +00:00
|
|
|
void (*hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
|
2014-08-04 20:29:37 +00:00
|
|
|
int32_t *tc, uint8_t *no_p, uint8_t *no_q);
|
2013-12-20 23:08:50 +00:00
|
|
|
void (*hevc_h_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
|
2014-08-04 20:29:37 +00:00
|
|
|
int beta, int32_t *tc,
|
2013-12-20 23:08:50 +00:00
|
|
|
uint8_t *no_p, uint8_t *no_q);
|
|
|
|
void (*hevc_v_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
|
2014-08-04 20:29:37 +00:00
|
|
|
int beta, int32_t *tc,
|
2013-12-20 23:08:50 +00:00
|
|
|
uint8_t *no_p, uint8_t *no_q);
|
|
|
|
void (*hevc_h_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride,
|
2014-08-04 20:29:37 +00:00
|
|
|
int32_t *tc, uint8_t *no_p,
|
2013-12-20 23:08:50 +00:00
|
|
|
uint8_t *no_q);
|
|
|
|
void (*hevc_v_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride,
|
2014-08-04 20:29:37 +00:00
|
|
|
int32_t *tc, uint8_t *no_p,
|
2013-12-20 23:08:50 +00:00
|
|
|
uint8_t *no_q);
|
2013-10-12 09:55:48 +00:00
|
|
|
} HEVCDSPContext;
|
|
|
|
|
|
|
|
void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
|
|
|
|
|
2014-04-26 13:35:23 +00:00
|
|
|
extern const int8_t ff_hevc_epel_filters[7][4];
|
|
|
|
extern const int8_t ff_hevc_qpel_filters[3][16];
|
|
|
|
|
2014-07-26 22:19:25 +00:00
|
|
|
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
|
2015-02-05 06:22:19 +00:00
|
|
|
void ff_hevcdsp_init_arm(HEVCDSPContext *c, const int bit_depth);
|
2013-10-12 09:55:48 +00:00
|
|
|
#endif /* AVCODEC_HEVCDSP_H */
|