vc2enc: halve allocated table size, refactor and optimize quantization

Since coefficients differ only in the last bit when writing to the
bitstream it was possible to remove the sign from the tables, thus
halving them. Also now all quantization is done in the unsigned domain
as the sign is completely separate, which gets rid of the need to do
quantization on 32 bit signed integers.

Overall, this slightly speeds up the encoder depending on the machine.
The commit still generates bit-identical files as before the commit.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
This commit is contained in:
Rostislav Pehlivanov 2016-02-26 14:47:06 +00:00
parent 7db2e757c5
commit 2b811e4605
2 changed files with 50 additions and 59 deletions

View File

@ -32,6 +32,8 @@
/* Quantizations above this usually zero coefficients and lower the quality */
#define MAX_QUANT_INDEX 50
/* Total range is -COEF_LUT_TAB to +COEFF_LUT_TAB, but total tab size is half
* (COEF_LUT_TAB*MAX_QUANT_INDEX) since the sign is appended during encoding */
#define COEF_LUT_TAB 2048
enum VC2_QM {
@ -104,7 +106,7 @@ typedef struct VC2EncContext {
uint8_t quant[MAX_DWT_LEVELS][4];
/* Coefficient LUT */
uint32_t *coef_lut_val;
uint16_t *coef_lut_val;
uint8_t *coef_lut_len;
int num_x; /* #slices horizontally */
@ -147,7 +149,7 @@ static av_always_inline void put_padding(PutBitContext *pb, int bytes)
put_bits(pb, bits, 0);
}
static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val)
static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint16_t val)
{
int i;
int pbits = 0, bits = 0, topbit = 1, maxval = 1;
@ -192,7 +194,7 @@ static av_always_inline int count_vc2_ue_uint(uint16_t val)
}
static av_always_inline void get_vc2_ue_uint(uint16_t val, uint8_t *nbits,
uint32_t *eval)
uint16_t *eval)
{
int i;
int pbits = 0, bits = 0, topbit = 1, maxval = 1;
@ -539,29 +541,7 @@ static void encode_picture_start(VC2EncContext *s)
encode_wavelet_transform(s);
}
#define QUANT(c) \
c <<= 2; \
c /= qfactor; \
static av_always_inline void coeff_quantize_get(qcoef coeff, int qfactor,
uint8_t *len, uint32_t *eval)
{
QUANT(coeff)
get_vc2_ue_uint(FFABS(coeff), len, eval);
if (coeff) {
*eval = (*eval << 1) | (coeff < 0);
*len += 1;
}
}
static av_always_inline void coeff_quantize_encode(PutBitContext *pb, qcoef coeff,
int qfactor)
{
QUANT(coeff)
put_vc2_ue_uint(pb, FFABS(coeff));
if (coeff)
put_bits(pb, 1, coeff < 0);
}
#define QUANT(c, qf) (((c) << 2)/(qf))
/* VC-2 13.5.5.2 - slice_band() */
static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
@ -569,23 +549,33 @@ static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
{
int x, y;
int left = b->width * (sx+0) / s->num_x;
int right = b->width * (sx+1) / s->num_x;
int top = b->height * (sy+0) / s->num_y;
int bottom = b->height * (sy+1) / s->num_y;
const int left = b->width * (sx+0) / s->num_x;
const int right = b->width * (sx+1) / s->num_x;
const int top = b->height * (sy+0) / s->num_y;
const int bottom = b->height * (sy+1) / s->num_y;
int qfactor = ff_dirac_qscale_tab[quant];
uint8_t *len_lut = &s->coef_lut_len[2*quant*COEF_LUT_TAB + COEF_LUT_TAB];
uint32_t *val_lut = &s->coef_lut_val[2*quant*COEF_LUT_TAB + COEF_LUT_TAB];
const int qfactor = ff_dirac_qscale_tab[quant];
const uint8_t *len_lut = &s->coef_lut_len[quant*COEF_LUT_TAB];
const uint16_t *val_lut = &s->coef_lut_val[quant*COEF_LUT_TAB];
dwtcoef *coeff = b->buf + top * b->stride;
for (y = top; y < bottom; y++) {
for (x = left; x < right; x++) {
if (coeff[x] >= -COEF_LUT_TAB && coeff[x] < COEF_LUT_TAB)
put_bits(pb, len_lut[coeff[x]], val_lut[coeff[x]]);
else
coeff_quantize_encode(pb, coeff[x], qfactor);
const int neg = coeff[x] < 0;
uint16_t c_abs = FFABS(coeff[x]);
if (c_abs < COEF_LUT_TAB) {
const uint8_t len = len_lut[c_abs];
if (len == 1)
put_bits(pb, 1, 1);
else
put_bits(pb, len + 1, (val_lut[c_abs] << 1) | neg);
} else {
c_abs = QUANT(c_abs, qfactor);
put_vc2_ue_uint(pb, c_abs);
if (c_abs)
put_bits(pb, 1, neg);
}
}
coeff += b->stride;
}
@ -594,7 +584,7 @@ static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
static int count_hq_slice(VC2EncContext *s, int slice_x,
int slice_y, int quant_idx)
{
int x, y, left, right, top, bottom, qfactor;
int x, y;
uint8_t quants[MAX_DWT_LEVELS][4];
int bits = 0, p, level, orientation;
@ -611,28 +601,29 @@ static int count_hq_slice(VC2EncContext *s, int slice_x,
bits += 8;
for (level = 0; level < s->wavelet_depth; level++) {
for (orientation = !!level; orientation < 4; orientation++) {
dwtcoef *buf;
SubBand *b = &s->plane[p].band[level][orientation];
quant_idx = quants[level][orientation];
qfactor = ff_dirac_qscale_tab[quant_idx];
const int q_idx = quants[level][orientation];
const uint8_t *len_lut = &s->coef_lut_len[q_idx*COEF_LUT_TAB];
const int qfactor = ff_dirac_qscale_tab[q_idx];
left = b->width * slice_x / s->num_x;
right = b->width *(slice_x+1) / s->num_x;
top = b->height * slice_y / s->num_y;
bottom = b->height *(slice_y+1) / s->num_y;
const int left = b->width * slice_x / s->num_x;
const int right = b->width *(slice_x+1) / s->num_x;
const int top = b->height * slice_y / s->num_y;
const int bottom = b->height *(slice_y+1) / s->num_y;
buf = b->buf + top * b->stride;
dwtcoef *buf = b->buf + top * b->stride;
for (y = top; y < bottom; y++) {
for (x = left; x < right; x++) {
qcoef coeff = (qcoef)buf[x];
if (coeff >= -COEF_LUT_TAB && coeff < COEF_LUT_TAB) {
bits += s->coef_lut_len[2*quant_idx*COEF_LUT_TAB + coeff + COEF_LUT_TAB];
uint16_t c_abs = FFABS(buf[x]);
if (c_abs < COEF_LUT_TAB) {
const int len = len_lut[c_abs];
bits += len + (len != 1);
} else {
QUANT(coeff)
bits += count_vc2_ue_uint(FFABS(coeff));
bits += !!coeff;
c_abs = QUANT(c_abs, qfactor);
bits += count_vc2_ue_uint(c_abs);
bits += !!c_abs;
}
}
buf += b->stride;
@ -1122,19 +1113,20 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx)
goto alloc_fail;
/* Lookup tables */
s->coef_lut_len = av_malloc(2*COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_len));
s->coef_lut_len = av_malloc(COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_len));
if (!s->coef_lut_len)
goto alloc_fail;
s->coef_lut_val = av_malloc(2*COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_val));
s->coef_lut_val = av_malloc(COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_val));
if (!s->coef_lut_val)
goto alloc_fail;
for (i = 0; i < s->q_ceil; i++) {
for (j = -COEF_LUT_TAB; j < COEF_LUT_TAB; j++) {
uint8_t *len_lut = &s->coef_lut_len[2*i*COEF_LUT_TAB + COEF_LUT_TAB];
uint32_t *val_lut = &s->coef_lut_val[2*i*COEF_LUT_TAB + COEF_LUT_TAB];
coeff_quantize_get(j, ff_dirac_qscale_tab[i], &len_lut[j], &val_lut[j]);
for (j = 0; j < COEF_LUT_TAB; j++) {
uint8_t *len_lut = &s->coef_lut_len[i*COEF_LUT_TAB];
uint16_t *val_lut = &s->coef_lut_val[i*COEF_LUT_TAB];
get_vc2_ue_uint(QUANT(j, ff_dirac_qscale_tab[i]),
&len_lut[j], &val_lut[j]);
}
}

View File

@ -26,7 +26,6 @@
#include <stdint.h>
typedef int16_t dwtcoef;
typedef int32_t qcoef; /* Quantization needs more precision */
/* Only Deslauriers-Dubuc (9,7) and LeGall (5,3) supported! */