mirror of https://git.ffmpeg.org/ffmpeg.git
diracdec: rewrite golomb reader
This version is able to output multiple coefficients at a time and is able to altogether remove actual golomb code parsing. Its also able to partially recover the last coefficient in case the packet is incomplete. Total decoder performance gain for 8bit 420 1080p lossless: 40%. Total decoder performance gain for 10bit 420 1080p lossless: 40%. clang was able to vectorize the loop much better than my handwritten assembly, but gcc was very naive and didn't. Lookup table is a rewritten version of vc2hqdecode.
This commit is contained in:
parent
d778be6e4a
commit
675bb1f4f9
File diff suppressed because it is too large
Load Diff
|
@ -1,7 +1,4 @@
|
|||
/*
|
||||
* Copyright (C) 2016 Open Broadcast Systems Ltd.
|
||||
* Author 2016 Rostislav Pehlivanov <rpehlivanov@obe.tv>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
|
@ -24,28 +21,9 @@
|
|||
|
||||
#include "libavutil/avutil.h"
|
||||
|
||||
/* Can be 32 bits wide for some performance gain on some machines, but it will
|
||||
* incorrectly decode very long coefficients (usually only 1 or 2 per frame) */
|
||||
typedef uint64_t residual;
|
||||
|
||||
#define LUT_BITS 8
|
||||
|
||||
/* Exactly 64 bytes */
|
||||
typedef struct DiracGolombLUT {
|
||||
residual preamble, leftover;
|
||||
int32_t ready[LUT_BITS];
|
||||
int32_t preamble_bits, leftover_bits, ready_num;
|
||||
int8_t need_s, sign;
|
||||
} DiracGolombLUT;
|
||||
|
||||
av_cold int ff_dirac_golomb_reader_init(DiracGolombLUT **lut_ctx);
|
||||
|
||||
int ff_dirac_golomb_read_32bit(DiracGolombLUT *lut_ctx, const uint8_t *buf,
|
||||
int bytes, uint8_t *dst, int coeffs);
|
||||
|
||||
int ff_dirac_golomb_read_16bit(DiracGolombLUT *lut_ctx, const uint8_t *buf,
|
||||
int bytes, uint8_t *_dst, int coeffs);
|
||||
|
||||
av_cold void ff_dirac_golomb_reader_end(DiracGolombLUT **lut_ctx);
|
||||
int ff_dirac_golomb_read_16bit(const uint8_t *buf, int bytes,
|
||||
uint8_t *_dst, int coeffs);
|
||||
int ff_dirac_golomb_read_32bit(const uint8_t *buf, int bytes,
|
||||
uint8_t *_dst, int coeffs);
|
||||
|
||||
#endif /* AVCODEC_DIRAC_VLC_H */
|
||||
|
|
|
@ -136,7 +136,6 @@ typedef struct DiracContext {
|
|||
MpegvideoEncDSPContext mpvencdsp;
|
||||
VideoDSPContext vdsp;
|
||||
DiracDSPContext diracdsp;
|
||||
DiracGolombLUT *reader_ctx;
|
||||
DiracVersionInfo version;
|
||||
GetBitContext gb;
|
||||
AVDiracSeqHeader seq;
|
||||
|
@ -395,7 +394,6 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx)
|
|||
s->threads_num_buf = -1;
|
||||
s->thread_buf_size = -1;
|
||||
|
||||
ff_dirac_golomb_reader_init(&s->reader_ctx);
|
||||
ff_diracdsp_init(&s->diracdsp);
|
||||
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
|
||||
ff_videodsp_init(&s->vdsp, 8);
|
||||
|
@ -428,8 +426,6 @@ static av_cold int dirac_decode_end(AVCodecContext *avctx)
|
|||
DiracContext *s = avctx->priv_data;
|
||||
int i;
|
||||
|
||||
ff_dirac_golomb_reader_end(&s->reader_ctx);
|
||||
|
||||
dirac_decode_flush(avctx);
|
||||
for (i = 0; i < MAX_FRAMES; i++)
|
||||
av_frame_free(&s->all_frames[i].avframe);
|
||||
|
@ -881,11 +877,11 @@ static int decode_hq_slice(DiracContext *s, DiracSlice *slice, uint8_t *tmp_buf)
|
|||
coef_num = subband_coeffs(s, slice->slice_x, slice->slice_y, i, coeffs_num);
|
||||
|
||||
if (s->pshift)
|
||||
coef_par = ff_dirac_golomb_read_32bit(s->reader_ctx, addr,
|
||||
length, tmp_buf, coef_num);
|
||||
coef_par = ff_dirac_golomb_read_32bit(addr, length,
|
||||
tmp_buf, coef_num);
|
||||
else
|
||||
coef_par = ff_dirac_golomb_read_16bit(s->reader_ctx, addr,
|
||||
length, tmp_buf, coef_num);
|
||||
coef_par = ff_dirac_golomb_read_16bit(addr, length,
|
||||
tmp_buf, coef_num);
|
||||
|
||||
if (coef_num > coef_par) {
|
||||
const int start_b = coef_par * (1 << (s->pshift + 1));
|
||||
|
|
Loading…
Reference in New Issue