From fe1b5d974acf7736151e2e13f2498f4fbd6af765 Mon Sep 17 00:00:00 2001 From: David Conrad Date: Fri, 23 Jul 2010 21:46:17 +0000 Subject: [PATCH] Decode DCT tokens by branching to a different code path for each branch on the huffman tree, instead of traversing the tree in a while loop. Based on the similar optimization in libvpx's detokenize.c 10% faster at normal bitrates, and 30% faster for high-bitrate intra-only Originally committed as revision 24468 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/vp56.h | 18 ++++++++++++ libavcodec/vp8.c | 69 ++++++++++++++++++++++++++++++-------------- libavcodec/vp8data.h | 20 ++----------- 3 files changed, 67 insertions(+), 40 deletions(-) diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h index b7c1887596..ad07a49e9b 100644 --- a/libavcodec/vp56.h +++ b/libavcodec/vp56.h @@ -226,6 +226,24 @@ static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) return bit; } +// branchy variant, to be used where there's a branch based on the bit decoded +static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob) +{ + unsigned long code_word = vp56_rac_renorm(c); + unsigned low = 1 + (((c->high - 1) * prob) >> 8); + unsigned low_shift = low << 8; + + if (code_word >= low_shift) { + c->high -= low; + c->code_word = code_word - low_shift; + return 1; + } + + c->high = low; + c->code_word = code_word; + return 0; +} + static inline int vp56_rac_get(VP56RangeCoder *c) { unsigned int code_word = vp56_rac_renorm(c); diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 81447c4fd8..acdaf56b03 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -800,36 +800,61 @@ static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], uint8_t probs[8][3][NUM_DCT_TOKENS-1], int i, int zero_nhood, int16_t qmul[2]) { - int token, nonzero = 0; - int offset = 0; + uint8_t *token_prob; + int nonzero = 0; + int coeff; - for (; i < 16; i++) { - token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); + do { + token_prob = probs[vp8_coeff_band[i]][zero_nhood]; - if (token == DCT_EOB) - break; - else if (token >= DCT_CAT1) { - int cat = token-DCT_CAT1; - token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); - token += 3 + (2<