vp8: move block coeff arithcoder on stack.

This prevents gcc from assuming that contents of it may have changed
between calls to vp56_range_get_prob(), thus preventing countless (and
unnecessary) movs. Decoding of sintel trailer goes from (avg+SG) 9.796
+/- 0.003 to 9.635 +/- 0.010.
This commit is contained in:
Ronald S. Bultje 2012-05-29 10:56:15 -07:00
parent 64bde80563
commit 6163d880c0
1 changed files with 20 additions and 18 deletions

View File

@ -707,56 +707,58 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_
* @return 0 if no coeffs were decoded
* otherwise, the index of the last coeff decoded plus one
*/
static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
uint8_t probs[16][3][NUM_DCT_TOKENS-1],
int i, uint8_t *token_prob, int16_t qmul[2])
{
VP56RangeCoder c = *r;
goto skip_eob;
do {
int coeff;
if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
return i;
if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
break;
skip_eob:
if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
if (++i == 16)
return i; // invalid input; blocks should end with EOB
break; // invalid input; blocks should end with EOB
token_prob = probs[i][0];
goto skip_eob;
}
if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
coeff = 1;
token_prob = probs[i+1][1];
} else {
if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
if (coeff)
coeff += vp56_rac_get_prob(c, token_prob[5]);
coeff += vp56_rac_get_prob(&c, token_prob[5]);
coeff += 2;
} else {
// DCT_CAT*
if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
} else { // DCT_CAT2
coeff = 7;
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
}
} else { // DCT_CAT3 and up
int a = vp56_rac_get_prob(c, token_prob[8]);
int b = vp56_rac_get_prob(c, token_prob[9+a]);
int a = vp56_rac_get_prob(&c, token_prob[8]);
int b = vp56_rac_get_prob(&c, token_prob[9+a]);
int cat = (a<<1) + b;
coeff = 3 + (8<<cat);
coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
}
}
token_prob = probs[i+1][2];
}
block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
} while (++i < 16);
*r = c;
return i;
}
#endif