mirror of https://git.ffmpeg.org/ffmpeg.git
Decode DCT tokens by branching to a different code path for each branch
on the huffman tree, instead of traversing the tree in a while loop. Based on the similar optimization in libvpx's detokenize.c 10% faster at normal bitrates, and 30% faster for high-bitrate intra-only Originally committed as revision 24468 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
5474ec2ac8
commit
fe1b5d974a
|
@ -226,6 +226,24 @@ static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
|
|||
return bit;
|
||||
}
|
||||
|
||||
// branchy variant, to be used where there's a branch based on the bit decoded
|
||||
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
|
||||
{
|
||||
unsigned long code_word = vp56_rac_renorm(c);
|
||||
unsigned low = 1 + (((c->high - 1) * prob) >> 8);
|
||||
unsigned low_shift = low << 8;
|
||||
|
||||
if (code_word >= low_shift) {
|
||||
c->high -= low;
|
||||
c->code_word = code_word - low_shift;
|
||||
return 1;
|
||||
}
|
||||
|
||||
c->high = low;
|
||||
c->code_word = code_word;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int vp56_rac_get(VP56RangeCoder *c)
|
||||
{
|
||||
unsigned int code_word = vp56_rac_renorm(c);
|
||||
|
|
|
@ -800,36 +800,61 @@ static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
|
|||
uint8_t probs[8][3][NUM_DCT_TOKENS-1],
|
||||
int i, int zero_nhood, int16_t qmul[2])
|
||||
{
|
||||
int token, nonzero = 0;
|
||||
int offset = 0;
|
||||
uint8_t *token_prob;
|
||||
int nonzero = 0;
|
||||
int coeff;
|
||||
|
||||
for (; i < 16; i++) {
|
||||
token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset);
|
||||
do {
|
||||
token_prob = probs[vp8_coeff_band[i]][zero_nhood];
|
||||
|
||||
if (token == DCT_EOB)
|
||||
break;
|
||||
else if (token >= DCT_CAT1) {
|
||||
int cat = token-DCT_CAT1;
|
||||
token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
|
||||
token += 3 + (2<<cat);
|
||||
if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
|
||||
return nonzero;
|
||||
|
||||
skip_eob:
|
||||
if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
|
||||
zero_nhood = 0;
|
||||
token_prob = probs[vp8_coeff_band[++i]][0];
|
||||
if (i < 16)
|
||||
goto skip_eob;
|
||||
return nonzero; // invalid input; blocks should end with EOB
|
||||
}
|
||||
|
||||
// after the first token, the non-zero prediction context becomes
|
||||
// based on the last decoded coeff
|
||||
if (!token) {
|
||||
zero_nhood = 0;
|
||||
offset = 1;
|
||||
continue;
|
||||
} else if (token == 1)
|
||||
if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
|
||||
coeff = 1;
|
||||
zero_nhood = 1;
|
||||
else
|
||||
} else {
|
||||
zero_nhood = 2;
|
||||
|
||||
if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
|
||||
coeff = vp56_rac_get_prob(c, token_prob[4]);
|
||||
if (coeff)
|
||||
coeff += vp56_rac_get_prob(c, token_prob[5]);
|
||||
coeff += 2;
|
||||
} else {
|
||||
// DCT_CAT*
|
||||
if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
|
||||
if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
|
||||
coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
|
||||
} else { // DCT_CAT2
|
||||
coeff = 7;
|
||||
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
|
||||
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
|
||||
}
|
||||
} else { // DCT_CAT3 and up
|
||||
int a = vp56_rac_get_prob(c, token_prob[8]);
|
||||
int b = vp56_rac_get_prob(c, token_prob[9+a]);
|
||||
int cat = (a<<1) + b;
|
||||
coeff = 3 + (8<<cat);
|
||||
coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// todo: full [16] qmat? load into register?
|
||||
block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i];
|
||||
nonzero = i+1;
|
||||
offset = 0;
|
||||
}
|
||||
block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
|
||||
nonzero = ++i;
|
||||
} while (i < 16);
|
||||
|
||||
return nonzero;
|
||||
}
|
||||
|
||||
|
|
|
@ -329,21 +329,6 @@ static const uint8_t vp8_coeff_band[16] =
|
|||
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7
|
||||
};
|
||||
|
||||
static const int8_t vp8_coeff_tree[NUM_DCT_TOKENS-1][2] =
|
||||
{
|
||||
{ -DCT_EOB, 1 }, // '0'
|
||||
{ -DCT_0, 2 }, // '10'
|
||||
{ -DCT_1, 3 }, // '110'
|
||||
{ 4, 6 },
|
||||
{ -DCT_2, 5 }, // '11100'
|
||||
{ -DCT_3, -DCT_4 }, // '111010', '111011'
|
||||
{ 7, 8 },
|
||||
{ -DCT_CAT1, -DCT_CAT2 }, // '111100', '111101'
|
||||
{ 9, 10 },
|
||||
{ -DCT_CAT3, -DCT_CAT4 }, // '1111100', '1111101'
|
||||
{ -DCT_CAT5, -DCT_CAT6 }, // '1111110', '1111111'
|
||||
};
|
||||
|
||||
static const uint8_t vp8_dct_cat1_prob[] = { 159, 0 };
|
||||
static const uint8_t vp8_dct_cat2_prob[] = { 165, 145, 0 };
|
||||
static const uint8_t vp8_dct_cat3_prob[] = { 173, 148, 140, 0 };
|
||||
|
@ -351,10 +336,9 @@ static const uint8_t vp8_dct_cat4_prob[] = { 176, 155, 140, 135, 0 };
|
|||
static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 };
|
||||
static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
|
||||
|
||||
static const uint8_t * const vp8_dct_cat_prob[6] =
|
||||
// only used for cat3 and above; cat 1 and 2 are referenced directly
|
||||
static const uint8_t * const vp8_dct_cat_prob[] =
|
||||
{
|
||||
vp8_dct_cat1_prob,
|
||||
vp8_dct_cat2_prob,
|
||||
vp8_dct_cat3_prob,
|
||||
vp8_dct_cat4_prob,
|
||||
vp8_dct_cat5_prob,
|
||||
|
|
Loading…
Reference in New Issue