vp8: move block coeff arithcoder on stack.

This prevents gcc from assuming that contents of it may have changed
between calls to vp56_range_get_prob(), thus preventing countless (and
unnecessary) movs. Decoding of sintel trailer goes from (avg+SG) 9.796
+/- 0.003 to 9.635 +/- 0.010.
This commit is contained in:
Ronald S. Bultje 2012-05-29 10:56:15 -07:00
parent 64bde80563
commit 6163d880c0
1 changed files with 20 additions and 18 deletions

View File

@ -707,56 +707,58 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_
* @return 0 if no coeffs were decoded * @return 0 if no coeffs were decoded
* otherwise, the index of the last coeff decoded plus one * otherwise, the index of the last coeff decoded plus one
*/ */
static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16], static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
uint8_t probs[16][3][NUM_DCT_TOKENS-1], uint8_t probs[16][3][NUM_DCT_TOKENS-1],
int i, uint8_t *token_prob, int16_t qmul[2]) int i, uint8_t *token_prob, int16_t qmul[2])
{ {
VP56RangeCoder c = *r;
goto skip_eob; goto skip_eob;
do { do {
int coeff; int coeff;
if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
return i; break;
skip_eob: skip_eob:
if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
if (++i == 16) if (++i == 16)
return i; // invalid input; blocks should end with EOB break; // invalid input; blocks should end with EOB
token_prob = probs[i][0]; token_prob = probs[i][0];
goto skip_eob; goto skip_eob;
} }
if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
coeff = 1; coeff = 1;
token_prob = probs[i+1][1]; token_prob = probs[i+1][1];
} else { } else {
if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
coeff = vp56_rac_get_prob_branchy(c, token_prob[4]); coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
if (coeff) if (coeff)
coeff += vp56_rac_get_prob(c, token_prob[5]); coeff += vp56_rac_get_prob(&c, token_prob[5]);
coeff += 2; coeff += 2;
} else { } else {
// DCT_CAT* // DCT_CAT*
if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
} else { // DCT_CAT2 } else { // DCT_CAT2
coeff = 7; coeff = 7;
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]); coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
} }
} else { // DCT_CAT3 and up } else { // DCT_CAT3 and up
int a = vp56_rac_get_prob(c, token_prob[8]); int a = vp56_rac_get_prob(&c, token_prob[8]);
int b = vp56_rac_get_prob(c, token_prob[9+a]); int b = vp56_rac_get_prob(&c, token_prob[9+a]);
int cat = (a<<1) + b; int cat = (a<<1) + b;
coeff = 3 + (8<<cat); coeff = 3 + (8<<cat);
coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]); coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
} }
} }
token_prob = probs[i+1][2]; token_prob = probs[i+1][2];
} }
block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
} while (++i < 16); } while (++i < 16);
*r = c;
return i; return i;
} }
#endif #endif