mirror of https://git.ffmpeg.org/ffmpeg.git
optimizations
Originally committed as revision 1867 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
f138f88364
commit
4704097a2b
|
@ -95,3 +95,60 @@ const uint8_t ff_ue_golomb_len[256]={
|
||||||
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
||||||
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,17,
|
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,17,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const uint8_t ff_interleaved_golomb_vlc_len[256]={
|
||||||
|
9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
|
||||||
|
9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||||
|
9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
|
||||||
|
9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
};
|
||||||
|
|
||||||
|
const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={
|
||||||
|
15,16,7, 7, 17,18,8, 8, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5,
|
||||||
|
27,28,13,13,29,30,14,14,6, 6, 6, 6, 6, 6, 6, 6,
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const int8_t ff_interleaved_se_golomb_vlc_code[256]={
|
||||||
|
8, -8, 4, 4, 9, -9, -4, -4, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
10,-10, 5, 5, 11,-11, -5, -5, -2, -2, -2, -2, -2, -2, -2, -2,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
|
12,-12, 6, 6, 13,-13, -6, -6, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||||
|
14,-14, 7, 7, 15,-15, -7, -7, -3, -3, -3, -3, -3, -3, -3, -3,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
};
|
||||||
|
|
|
@ -32,6 +32,10 @@ extern const uint8_t ff_ue_golomb_vlc_code[512];
|
||||||
extern const int8_t ff_se_golomb_vlc_code[512];
|
extern const int8_t ff_se_golomb_vlc_code[512];
|
||||||
extern const uint8_t ff_ue_golomb_len[256];
|
extern const uint8_t ff_ue_golomb_len[256];
|
||||||
|
|
||||||
|
extern const uint8_t ff_interleaved_golomb_vlc_len[256];
|
||||||
|
extern const uint8_t ff_interleaved_ue_golomb_vlc_code[256];
|
||||||
|
extern const int8_t ff_interleaved_se_golomb_vlc_code[256];
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* read unsigned exp golomb code.
|
* read unsigned exp golomb code.
|
||||||
|
@ -62,24 +66,33 @@ static inline int get_ue_golomb(GetBitContext *gb){
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int svq3_get_ue_golomb(GetBitContext *gb){
|
static inline int svq3_get_ue_golomb(GetBitContext *gb){
|
||||||
unsigned int buf;
|
uint32_t buf;
|
||||||
int log;
|
int log;
|
||||||
|
|
||||||
OPEN_READER(re, gb);
|
OPEN_READER(re, gb);
|
||||||
UPDATE_CACHE(re, gb);
|
UPDATE_CACHE(re, gb);
|
||||||
buf=GET_CACHE(re, gb)|1;
|
buf=GET_CACHE(re, gb);
|
||||||
|
|
||||||
|
if(buf&0xAA800000){
|
||||||
|
buf >>= 32 - 8;
|
||||||
|
LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
|
||||||
|
CLOSE_READER(re, gb);
|
||||||
|
|
||||||
|
return ff_interleaved_ue_golomb_vlc_code[buf];
|
||||||
|
}else{
|
||||||
|
buf|=1;
|
||||||
|
if((buf & 0xAAAAAAAA) == 0)
|
||||||
|
return INVALID_VLC;
|
||||||
|
|
||||||
if((buf & 0xAAAAAAAA) == 0)
|
for(log=31; (buf & 0x80000000) == 0; log--){
|
||||||
return INVALID_VLC;
|
buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
|
||||||
|
}
|
||||||
|
|
||||||
for(log=31; (buf & 0x80000000) == 0; log--){
|
LAST_SKIP_BITS(re, gb, 63 - 2*log);
|
||||||
buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
|
CLOSE_READER(re, gb);
|
||||||
|
|
||||||
|
return ((buf << log) >> log) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
LAST_SKIP_BITS(re, gb, 63 - 2*log);
|
|
||||||
CLOSE_READER(re, gb);
|
|
||||||
|
|
||||||
return ((buf << log) >> log) - 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -141,19 +154,28 @@ static inline int svq3_get_se_golomb(GetBitContext *gb){
|
||||||
|
|
||||||
OPEN_READER(re, gb);
|
OPEN_READER(re, gb);
|
||||||
UPDATE_CACHE(re, gb);
|
UPDATE_CACHE(re, gb);
|
||||||
buf=GET_CACHE(re, gb)|1;
|
buf=GET_CACHE(re, gb);
|
||||||
|
|
||||||
if((buf & 0xAAAAAAAA) == 0)
|
if(buf&0xAA800000){
|
||||||
return INVALID_VLC;
|
buf >>= 32 - 8;
|
||||||
|
LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
|
||||||
|
CLOSE_READER(re, gb);
|
||||||
|
|
||||||
|
return ff_interleaved_se_golomb_vlc_code[buf];
|
||||||
|
}else{
|
||||||
|
buf |=1;
|
||||||
|
if((buf & 0xAAAAAAAA) == 0)
|
||||||
|
return INVALID_VLC;
|
||||||
|
|
||||||
for(log=31; (buf & 0x80000000) == 0; log--){
|
for(log=31; (buf & 0x80000000) == 0; log--){
|
||||||
buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
|
buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
|
||||||
|
}
|
||||||
|
|
||||||
|
LAST_SKIP_BITS(re, gb, 63 - 2*log);
|
||||||
|
CLOSE_READER(re, gb);
|
||||||
|
|
||||||
|
return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
LAST_SKIP_BITS(re, gb, 63 - 2*log);
|
|
||||||
CLOSE_READER(re, gb);
|
|
||||||
|
|
||||||
return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef TRACE
|
#ifdef TRACE
|
||||||
|
|
|
@ -2278,13 +2278,19 @@ static void hl_decode_mb(H264Context *h){
|
||||||
|
|
||||||
|
|
||||||
if(!IS_INTRA4x4(mb_type)){
|
if(!IS_INTRA4x4(mb_type)){
|
||||||
for(i=0; i<16; i++){
|
if(s->codec_id == CODEC_ID_H264){
|
||||||
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
|
for(i=0; i<16; i++){
|
||||||
uint8_t * const ptr= dest_y + h->block_offset[i];
|
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
|
||||||
if(s->codec_id == CODEC_ID_H264)
|
uint8_t * const ptr= dest_y + h->block_offset[i];
|
||||||
h264_add_idct_c(ptr, h->mb + i*16, linesize);
|
h264_add_idct_c(ptr, h->mb + i*16, linesize);
|
||||||
else
|
}
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
for(i=0; i<16; i++){
|
||||||
|
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
|
||||||
|
uint8_t * const ptr= dest_y + h->block_offset[i];
|
||||||
svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
|
svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2292,22 +2298,31 @@ static void hl_decode_mb(H264Context *h){
|
||||||
if(!(s->flags&CODEC_FLAG_GRAY)){
|
if(!(s->flags&CODEC_FLAG_GRAY)){
|
||||||
chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
|
chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
|
||||||
chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
|
chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
|
||||||
for(i=16; i<16+4; i++){
|
if(s->codec_id == CODEC_ID_H264){
|
||||||
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
|
for(i=16; i<16+4; i++){
|
||||||
uint8_t * const ptr= dest_cb + h->block_offset[i];
|
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
|
||||||
if(s->codec_id == CODEC_ID_H264)
|
uint8_t * const ptr= dest_cb + h->block_offset[i];
|
||||||
h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
|
h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
|
||||||
else
|
}
|
||||||
svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
|
|
||||||
}
|
}
|
||||||
}
|
for(i=20; i<20+4; i++){
|
||||||
for(i=20; i<20+4; i++){
|
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
|
||||||
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
|
uint8_t * const ptr= dest_cr + h->block_offset[i];
|
||||||
uint8_t * const ptr= dest_cr + h->block_offset[i];
|
|
||||||
if(s->codec_id == CODEC_ID_H264)
|
|
||||||
h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
|
h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
|
||||||
else
|
}
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
for(i=16; i<16+4; i++){
|
||||||
|
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
|
||||||
|
uint8_t * const ptr= dest_cb + h->block_offset[i];
|
||||||
svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
|
svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(i=20; i<20+4; i++){
|
||||||
|
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
|
||||||
|
uint8_t * const ptr= dest_cr + h->block_offset[i];
|
||||||
|
svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -729,6 +729,13 @@ static int svq3_decode_frame (AVCodecContext *avctx,
|
||||||
while (get_bits (&s->gb, 1)) {
|
while (get_bits (&s->gb, 1)) {
|
||||||
get_bits (&s->gb, 8);
|
get_bits (&s->gb, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(avctx->debug&FF_DEBUG_PICT_INFO){
|
||||||
|
printf("%c hpel:%d, tpel:%d aqp:%d qp:%d\n",
|
||||||
|
ff_get_pict_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag,
|
||||||
|
s->adaptive_quant, s->qscale
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/* B-frames are not supported */
|
/* B-frames are not supported */
|
||||||
if (s->pict_type == B_TYPE/* && avctx->hurry_up*/)
|
if (s->pict_type == B_TYPE/* && avctx->hurry_up*/)
|
||||||
|
|
Loading…
Reference in New Issue