new bitstream reader API (old get_bits() based one is emulated and will still be supported in the future cuz its simpler)

minor optimizations to get_vlc

Originally committed as revision 725 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Michael Niedermayer 2002-07-09 10:35:10 +00:00
parent 436483c2ec
commit 8db1a1dde0
4 changed files with 344 additions and 525 deletions

View File

@ -118,101 +118,39 @@ void put_string(PutBitContext * pbc, char *s)
/* bit input functions */
void init_get_bits(GetBitContext *s,
void init_get_bits(GetBitContext *s,
UINT8 *buffer, int buffer_size)
{
s->buffer= buffer;
s->size= buffer_size;
s->buffer_end= buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index=0;
s->buffer= buffer;
#else
s->buf = buffer;
s->buf_ptr = buffer;
s->buf_end = buffer + buffer_size;
s->bit_cnt = 0;
s->bit_buf = 0;
while (s->buf_ptr < s->buf_end &&
s->bit_cnt < 32) {
s->bit_buf |= (*s->buf_ptr++ << (24 - s->bit_cnt));
s->bit_cnt += 8;
}
#elif defined LIBMPEG2_BITSTREAM_READER
s->buffer_ptr = buffer;
s->bit_count = 16;
s->cache = 0;
#elif defined A32_BITSTREAM_READER
s->buffer_ptr = (uint32_t*)buffer;
s->bit_count = 32;
s->cache0 = 0;
s->cache1 = 0;
#endif
s->size= buffer_size;
}
#ifndef ALT_BITSTREAM_READER
/* n must be >= 1 and <= 32 */
/* also true: n > s->bit_cnt */
unsigned int get_bits_long(GetBitContext *s, int n)
{
unsigned int val;
int bit_cnt;
unsigned int bit_buf;
#ifdef STATS
st_bit_counts[st_current_index] += n;
#endif
bit_buf = s->bit_buf;
bit_cnt = s->bit_cnt - n;
// if (bit_cnt >= 0) {
// val = bit_buf >> (32 - n);
// bit_buf <<= n;
// } else
{
UINT8 *buf_ptr;
val = bit_buf >> (32 - n);
buf_ptr = s->buf_ptr;
buf_ptr += 4;
/* handle common case: we can read everything */
if (buf_ptr <= s->buf_end) {
#ifdef ARCH_X86
bit_buf = bswap_32(*((unsigned long*)(&buf_ptr[-4])));
#else
bit_buf = (buf_ptr[-4] << 24) |
(buf_ptr[-3] << 16) |
(buf_ptr[-2] << 8) |
(buf_ptr[-1]);
#endif
val |= bit_buf >> (32 + bit_cnt);
bit_buf <<= - bit_cnt;
bit_cnt += 32;
} else {
buf_ptr -= 4;
bit_buf = 0;
if (buf_ptr < s->buf_end)
bit_buf |= *buf_ptr++ << 24;
if (buf_ptr < s->buf_end)
bit_buf |= *buf_ptr++ << 16;
if (buf_ptr < s->buf_end)
bit_buf |= *buf_ptr++ << 8;
if (buf_ptr < s->buf_end)
bit_buf |= *buf_ptr++;
val |= bit_buf >> (32 + bit_cnt);
bit_buf <<= - bit_cnt;
bit_cnt += 8*(buf_ptr - s->buf_ptr);
if(bit_cnt<0) bit_cnt=0;
}
s->buf_ptr = buf_ptr;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
// UPDATE_CACHE(re, s)
CLOSE_READER(re, s)
}
s->bit_buf = bit_buf;
s->bit_cnt = bit_cnt;
return val;
}
#ifdef A32_BITSTREAM_READER
s->cache1 = 0;
#endif
}
void align_get_bits(GetBitContext *s)
{
#ifdef ALT_BITSTREAM_READER
s->index= (s->index + 7) & (~7);
#else
int n;
n = s->bit_cnt & 7;
if (n > 0) {
get_bits(s, n);
}
#endif
int n= (-get_bits_count(s)) & 7;
if(n) skip_bits(s, n);
}
int check_marker(GetBitContext *s, char *msg)
@ -223,55 +161,6 @@ int check_marker(GetBitContext *s, char *msg)
return bit;
}
#ifndef ALT_BITSTREAM_READER
/* This function is identical to get_bits_long(), the */
/* only diference is that it doesn't touch the buffer */
/* it is usefull to see the buffer. */
unsigned int show_bits_long(GetBitContext *s, int n)
{
unsigned int val;
int bit_cnt;
unsigned int bit_buf;
UINT8 *buf_ptr;
bit_buf = s->bit_buf;
bit_cnt = s->bit_cnt - n;
val = bit_buf >> (32 - n);
buf_ptr = s->buf_ptr;
buf_ptr += 4;
/* handle common case: we can read everything */
if (buf_ptr <= s->buf_end) {
#ifdef ARCH_X86
bit_buf = bswap_32(*((unsigned long*)(&buf_ptr[-4])));
#else
bit_buf = (buf_ptr[-4] << 24) |
(buf_ptr[-3] << 16) |
(buf_ptr[-2] << 8) |
(buf_ptr[-1]);
#endif
} else {
buf_ptr -= 4;
bit_buf = 0;
if (buf_ptr < s->buf_end)
bit_buf |= *buf_ptr++ << 24;
if (buf_ptr < s->buf_end)
bit_buf |= *buf_ptr++ << 16;
if (buf_ptr < s->buf_end)
bit_buf |= *buf_ptr++ << 8;
if (buf_ptr < s->buf_end)
bit_buf |= *buf_ptr++;
}
val |= bit_buf >> (32 + bit_cnt);
bit_buf <<= - bit_cnt;
bit_cnt += 32;
return val;
}
#endif
/* VLC decoding */
//#define DEBUG_VLC
@ -300,18 +189,15 @@ static int alloc_table(VLC *vlc, int size)
vlc->table_size += size;
if (vlc->table_size > vlc->table_allocated) {
vlc->table_allocated += (1 << vlc->bits);
vlc->table_bits = realloc(vlc->table_bits,
sizeof(INT8) * vlc->table_allocated);
vlc->table_codes = realloc(vlc->table_codes,
sizeof(INT16) * vlc->table_allocated);
if (!vlc->table_bits ||
!vlc->table_codes)
vlc->table = realloc(vlc->table,
sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
if (!vlc->table)
return -1;
}
return index;
}
static int build_table(VLC *vlc, int table_nb_bits,
static int build_table(VLC *vlc, int table_nb_bits,
int nb_codes,
const void *bits, int bits_wrap, int bits_size,
const void *codes, int codes_wrap, int codes_size,
@ -319,23 +205,21 @@ static int build_table(VLC *vlc, int table_nb_bits,
{
int i, j, k, n, table_size, table_index, nb, n1, index;
UINT32 code;
INT8 *table_bits;
INT16 *table_codes;
VLC_TYPE (*table)[2];
table_size = 1 << table_nb_bits;
table_index = alloc_table(vlc, table_size);
#ifdef DEBUG_VLC
printf("new table index=%d size=%d code_prefix=%x n=%d\n",
printf("new table index=%d size=%d code_prefix=%x n=%d\n",
table_index, table_size, code_prefix, n_prefix);
#endif
if (table_index < 0)
return -1;
table_bits = &vlc->table_bits[table_index];
table_codes = &vlc->table_codes[table_index];
table = &vlc->table[table_index];
for(i=0;i<table_size;i++) {
table_bits[i] = 0;
table_codes[i] = -1;
table[i][1] = 0; //bits
table[i][0] = -1; //codes
}
/* first pass: map codes and compute auxillary table sizes */
@ -360,12 +244,12 @@ static int build_table(VLC *vlc, int table_nb_bits,
printf("%4x: code=%d n=%d\n",
j, i, n);
#endif
if (table_bits[j] != 0) {
if (table[j][1] /*bits*/ != 0) {
fprintf(stderr, "incorrect codes\n");
exit(1);
}
table_bits[j] = n;
table_codes[j] = i;
table[j][1] = n; //bits
table[j][0] = i; //code
j++;
}
} else {
@ -376,22 +260,22 @@ static int build_table(VLC *vlc, int table_nb_bits,
j, n);
#endif
/* compute table size */
n1 = -table_bits[j];
n1 = -table[j][1]; //bits
if (n > n1)
n1 = n;
table_bits[j] = -n1;
table[j][1] = -n1; //bits
}
}
}
/* second pass : fill auxillary tables recursively */
for(i=0;i<table_size;i++) {
n = table_bits[i];
n = table[i][1]; //bits
if (n < 0) {
n = -n;
if (n > table_nb_bits) {
n = table_nb_bits;
table_bits[i] = -n;
table[i][1] = -n; //bits
}
index = build_table(vlc, n, nb_codes,
bits, bits_wrap, bits_size,
@ -401,9 +285,8 @@ static int build_table(VLC *vlc, int table_nb_bits,
if (index < 0)
return -1;
/* note: realloc has been done, so reload tables */
table_bits = &vlc->table_bits[table_index];
table_codes = &vlc->table_codes[table_index];
table_codes[i] = index;
table = &vlc->table[table_index];
table[i][0] = index; //code
}
}
return table_index;
@ -436,8 +319,7 @@ int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
const void *codes, int codes_wrap, int codes_size)
{
vlc->bits = nb_bits;
vlc->table_bits = NULL;
vlc->table_codes = NULL;
vlc->table = NULL;
vlc->table_allocated = 0;
vlc->table_size = 0;
#ifdef DEBUG_VLC
@ -448,8 +330,7 @@ int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
bits, bits_wrap, bits_size,
codes, codes_wrap, codes_size,
0, 0) < 0) {
av_free(vlc->table_bits);
av_free(vlc->table_codes);
av_free(vlc->table);
return -1;
}
return 0;
@ -458,8 +339,7 @@ int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
void free_vlc(VLC *vlc)
{
av_free(vlc->table_bits);
av_free(vlc->table_codes);
av_free(vlc->table);
}
int ff_gcd(int a, int b){

View File

@ -11,13 +11,17 @@
//#define ALT_BITSTREAM_WRITER
//#define ALIGNED_BITSTREAM_WRITER
#define ALT_BITSTREAM_READER
//#define LIBMPEG2_BITSTREAM_READER
//#define A32_BITSTREAM_READER
#ifdef ARCH_ALPHA
#define ALT_BITSTREAM_READER
#endif
//#define ALIGNED_BITSTREAM
#define FAST_GET_FIRST_VLC
//#define DUMP_STREAM // only works with the ALT_BITSTREAM_READER
//#define DUMP_STREAM
#ifdef HAVE_AV_CONFIG_H
/* only include the following when compiling package */
@ -161,6 +165,13 @@ inline void dprintf(const char* fmt,...) {}
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define MIN(a,b) ((a) > (b) ? (b) : (a))
#ifdef ARCH_X86
// inverse for shift optimization (gcc should do that ...)
#define INV32(a) (-a)
#else
#define INV32(a) (32-a)
#endif
/* bit output */
struct PutBitContext;
@ -195,23 +206,29 @@ void jflush_put_bits(PutBitContext *s);
/* bit input */
typedef struct GetBitContext {
UINT8 *buffer, *buffer_end;
#ifdef ALT_BITSTREAM_READER
int index;
UINT8 *buffer;
#else
UINT32 bit_buf;
int bit_cnt;
UINT8 *buf, *buf_ptr, *buf_end;
#elif defined LIBMPEG2_BITSTREAM_READER
UINT8 *buffer_ptr;
UINT32 cache;
int bit_count;
#elif defined A32_BITSTREAM_READER
UINT32 *buffer_ptr;
UINT32 cache0;
UINT32 cache1;
int bit_count;
#endif
int size;
} GetBitContext;
static inline int get_bits_count(GetBitContext *s);
#define VLC_TYPE INT16
typedef struct VLC {
int bits;
INT16 *table_codes;
INT8 *table_bits;
VLC_TYPE (*table)[2]; // code, bits
int table_size, table_allocated;
} VLC;
@ -438,64 +455,234 @@ static inline uint8_t* pbBufPtr(PutBitContext *s)
#endif
}
void init_get_bits(GetBitContext *s,
UINT8 *buffer, int buffer_size);
/* Bitstream reader API docs:
name
abritary name which is used as prefix for the internal variables
gb
getbitcontext
OPEN_READER(name, gb)
loads gb into local variables
CLOSE_READER(name, gb)
stores local vars in gb
UPDATE_CACHE(name, gb)
refills the internal cache from the bitstream
after this call at least MIN_CACHE_BITS will be available,
GET_CACHE(name, gb)
will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
SHOW_UBITS(name, gb, num)
will return the nest num bits
SHOW_SBITS(name, gb, num)
will return the nest num bits and do sign extension
SKIP_BITS(name, gb, num)
will skip over the next num bits
note, this is equinvalent to SKIP_CACHE; SKIP_COUNTER
SKIP_CACHE(name, gb, num)
will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
SKIP_COUNTER(name, gb, num)
will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
LAST_SKIP_CACHE(name, gb, num)
will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
LAST_SKIP_BITS(name, gb, num)
is equinvalent to SKIP_LAST_CACHE; SKIP_COUNTER
for examples see get_bits, show_bits, skip_bits, get_vlc
*/
#ifdef ALT_BITSTREAM_READER
# define MIN_CACHE_BITS 25
# define OPEN_READER(name, gb)\
int name##_index= (gb)->index;\
int name##_cache= 0;\
# define CLOSE_READER(name, gb)\
(gb)->index= name##_index;\
# define UPDATE_CACHE(name, gb)\
name##_cache= be2me_32( unaligned32( ((uint8_t *)(gb)->buffer)+(name##_index>>3) ) ) << (name##_index&0x07);\
# define SKIP_CACHE(name, gb, num)\
name##_cache <<= (num);\
// FIXME name?
# define SKIP_COUNTER(name, gb, num)\
name##_index += (num);\
# define SKIP_BITS(name, gb, num)\
{\
SKIP_CACHE(name, gb, num)\
SKIP_COUNTER(name, gb, num)\
}\
# define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
# define LAST_SKIP_CACHE(name, gb, num) ;
# define SHOW_UBITS(name, gb, num)\
(((uint32_t)name##_cache)>>INV32(num))
# define SHOW_SBITS(name, gb, num)\
(((int32_t)name##_cache)>>INV32(num))
# define GET_CACHE(name, gb)\
((uint32_t)name##_cache)
static inline int get_bits_count(GetBitContext *s){
return s->index;
}
#elif defined LIBMPEG2_BITSTREAM_READER
//libmpeg2 like reader
# define MIN_CACHE_BITS 16
# define OPEN_READER(name, gb)\
int name##_bit_count=(gb)->bit_count;\
int name##_cache= (gb)->cache;\
uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
# define CLOSE_READER(name, gb)\
(gb)->bit_count= name##_bit_count;\
(gb)->cache= name##_cache;\
(gb)->buffer_ptr= name##_buffer_ptr;\
# define UPDATE_CACHE(name, gb)\
if(name##_bit_count > 0){\
name##_cache+= ((name##_buffer_ptr[0]<<8) + name##_buffer_ptr[1]) << name##_bit_count;\
name##_buffer_ptr+=2;\
name##_bit_count-= 16;\
}\
# define SKIP_CACHE(name, gb, num)\
name##_cache <<= (num);\
# define SKIP_COUNTER(name, gb, num)\
name##_bit_count += (num);\
# define SKIP_BITS(name, gb, num)\
{\
SKIP_CACHE(name, gb, num)\
SKIP_COUNTER(name, gb, num)\
}\
# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
# define SHOW_UBITS(name, gb, num)\
(((uint32_t)name##_cache)>>INV32(num))
# define SHOW_SBITS(name, gb, num)\
(((int32_t)name##_cache)>>INV32(num))
# define GET_CACHE(name, gb)\
((uint32_t)name##_cache)
static inline int get_bits_count(GetBitContext *s){
return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
}
#elif defined A32_BITSTREAM_READER
# define MIN_CACHE_BITS 32
# define OPEN_READER(name, gb)\
int name##_bit_count=(gb)->bit_count;\
uint32_t name##_cache0= (gb)->cache0;\
uint32_t name##_cache1= (gb)->cache1;\
uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
# define CLOSE_READER(name, gb)\
(gb)->bit_count= name##_bit_count;\
(gb)->cache0= name##_cache0;\
(gb)->cache1= name##_cache1;\
(gb)->buffer_ptr= name##_buffer_ptr;\
# define UPDATE_CACHE(name, gb)\
if(name##_bit_count > 0){\
const uint32_t next= be2me_32( *name##_buffer_ptr );\
name##_cache0 |= next>>INV32(name##_bit_count);\
name##_cache1 |= next<<name##_bit_count;\
name##_buffer_ptr++;\
name##_bit_count-= 32;\
}\
#ifdef ARCH_X86
# define SKIP_CACHE(name, gb, num)\
asm(\
"shldl %2, %1, %0 \n\t"\
"shll %2, %1 \n\t"\
: "+r" (name##_cache0), "+r" (name##_cache1)\
: "Ic" ((uint8_t)num)\
);
#else
# define SKIP_CACHE(name, gb, num)\
name##_cache0 <<= (num);\
name##_cache0 |= name##_cache1 >>INV32(num);\
name##_cache1 <<= (num);
#endif
# define SKIP_COUNTER(name, gb, num)\
name##_bit_count += (num);\
# define SKIP_BITS(name, gb, num)\
{\
SKIP_CACHE(name, gb, num)\
SKIP_COUNTER(name, gb, num)\
}\
# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
# define SHOW_UBITS(name, gb, num)\
(((uint32_t)name##_cache0)>>INV32(num))
# define SHOW_SBITS(name, gb, num)\
(((int32_t)name##_cache0)>>INV32(num))
# define GET_CACHE(name, gb)\
(name##_cache0)
static inline int get_bits_count(GetBitContext *s){
return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
}
#ifndef ALT_BITSTREAM_READER
unsigned int get_bits_long(GetBitContext *s, int n);
unsigned int show_bits_long(GetBitContext *s, int n);
#endif
static inline unsigned int get_bits(GetBitContext *s, int n){
#ifdef ALT_BITSTREAM_READER
#ifdef ALIGNED_BITSTREAM
int index= s->index;
uint32_t result1= be2me_32( ((uint32_t *)s->buffer)[index>>5] );
uint32_t result2= be2me_32( ((uint32_t *)s->buffer)[(index>>5) + 1] );
#ifdef ARCH_X86
asm ("shldl %%cl, %2, %0\n\t"
: "=r" (result1)
: "0" (result1), "r" (result2), "c" (index));
#else
result1<<= (index&0x1F);
result2= (result2>>1) >> (31-(index&0x1F));
result1|= result2;
#endif
result1>>= 32 - n;
index+= n;
s->index= index;
return result1;
#else //ALIGNED_BITSTREAM
int index= s->index;
uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buffer)+(index>>3) ) );
register int tmp;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
tmp= SHOW_UBITS(re, s, n);
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
return tmp;
}
result<<= (index&0x07);
result>>= 32 - n;
index+= n;
s->index= index;
#ifdef DUMP_STREAM
while(n){
printf("%d", (result>>(n-1))&1);
n--;
}
printf(" ");
#endif
return result;
#endif //!ALIGNED_BITSTREAM
#else //ALT_BITSTREAM_READER
if(s->bit_cnt>=n){
/* most common case here */
unsigned int val = s->bit_buf >> (32 - n);
s->bit_buf <<= n;
s->bit_cnt -= n;
#ifdef STATS
st_bit_counts[st_current_index] += n;
#endif
return val;
}
return get_bits_long(s,n);
#endif //!ALT_BITSTREAM_READER
static inline unsigned int show_bits(GetBitContext *s, int n){
register int tmp;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
tmp= SHOW_UBITS(re, s, n);
// CLOSE_READER(re, s)
return tmp;
}
static inline void skip_bits(GetBitContext *s, int n){
//Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
}
static inline unsigned int get_bits1(GetBitContext *s){
@ -506,158 +693,23 @@ static inline unsigned int get_bits1(GetBitContext *s){
result>>= 8 - 1;
index++;
s->index= index;
#ifdef DUMP_STREAM
printf("%d ", result);
#endif
return result;
#else
if(s->bit_cnt>0){
/* most common case here */
unsigned int val = s->bit_buf >> 31;
s->bit_buf <<= 1;
s->bit_cnt--;
#ifdef STATS
st_bit_counts[st_current_index]++;
#endif
return val;
}
return get_bits_long(s,1);
return get_bits(s, 1);
#endif
}
/* This function is identical to get_bits(), the only */
/* diference is that it doesn't touch the buffer */
/* it is usefull to see the buffer. */
static inline unsigned int show_bits(GetBitContext *s, int n)
{
#ifdef ALT_BITSTREAM_READER
#ifdef ALIGNED_BITSTREAM
int index= s->index;
uint32_t result1= be2me_32( ((uint32_t *)s->buffer)[index>>5] );
uint32_t result2= be2me_32( ((uint32_t *)s->buffer)[(index>>5) + 1] );
#ifdef ARCH_X86
asm ("shldl %%cl, %2, %0\n\t"
: "=r" (result1)
: "0" (result1), "r" (result2), "c" (index));
#else
result1<<= (index&0x1F);
result2= (result2>>1) >> (31-(index&0x1F));
result1|= result2;
#endif
result1>>= 32 - n;
return result1;
#else //ALIGNED_BITSTREAM
int index= s->index;
uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buffer)+(index>>3) ) );
result<<= (index&0x07);
result>>= 32 - n;
return result;
#endif //!ALIGNED_BITSTREAM
#else //ALT_BITSTREAM_READER
if(s->bit_cnt>=n) {
/* most common case here */
unsigned int val = s->bit_buf >> (32 - n);
return val;
}
return show_bits_long(s,n);
#endif //!ALT_BITSTREAM_READER
}
static inline int show_aligned_bits(GetBitContext *s, int offset, int n)
{
#ifdef ALT_BITSTREAM_READER
#ifdef ALIGNED_BITSTREAM
int index= (s->index + offset + 7)&(~7);
uint32_t result1= be2me_32( ((uint32_t *)s->buffer)[index>>5] );
uint32_t result2= be2me_32( ((uint32_t *)s->buffer)[(index>>5) + 1] );
#ifdef ARCH_X86
asm ("shldl %%cl, %2, %0\n\t"
: "=r" (result1)
: "0" (result1), "r" (result2), "c" (index));
#else
result1<<= (index&0x1F);
result2= (result2>>1) >> (31-(index&0x1F));
result1|= result2;
#endif
result1>>= 32 - n;
return result1;
#else //ALIGNED_BITSTREAM
int index= (s->index + offset + 7)>>3;
uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buffer)+index ) );
result>>= 32 - n;
return result;
#endif //!ALIGNED_BITSTREAM
#else //ALT_BITSTREAM_READER
int index= (get_bits_count(s) + offset + 7)>>3;
uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buf)+index ) );
result>>= 32 - n;
//printf(" %X %X %d \n", (int)(((uint8_t *)s->buf)+index ), (int)s->buf_ptr, s->bit_cnt);
return result;
#endif //!ALT_BITSTREAM_READER
}
static inline void skip_bits(GetBitContext *s, int n){
#ifdef ALT_BITSTREAM_READER
s->index+= n;
#ifdef DUMP_STREAM
{
int result;
s->index-= n;
result= get_bits(s, n);
}
#endif
#else
if(s->bit_cnt>=n){
/* most common case here */
s->bit_buf <<= n;
s->bit_cnt -= n;
#ifdef STATS
st_bit_counts[st_current_index] += n;
#endif
} else {
get_bits_long(s,n);
}
#endif
static inline unsigned int show_bits1(GetBitContext *s){
return show_bits(s, 1);
}
static inline void skip_bits1(GetBitContext *s){
#ifdef ALT_BITSTREAM_READER
s->index++;
#ifdef DUMP_STREAM
s->index--;
printf("%d ", get_bits1(s));
#endif
#else
if(s->bit_cnt>0){
/* most common case here */
s->bit_buf <<= 1;
s->bit_cnt--;
#ifdef STATS
st_bit_counts[st_current_index]++;
#endif
} else {
get_bits_long(s,1);
}
#endif
skip_bits(s, 1);
}
static inline int get_bits_count(GetBitContext *s)
{
#ifdef ALT_BITSTREAM_READER
return s->index;
#else
return (s->buf_ptr - s->buf) * 8 - s->bit_cnt;
#endif
}
void init_get_bits(GetBitContext *s,
UINT8 *buffer, int buffer_size);
int check_marker(GetBitContext *s, char *msg);
void align_get_bits(GetBitContext *s);
@ -666,126 +718,43 @@ int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
const void *codes, int codes_wrap, int codes_size);
void free_vlc(VLC *vlc);
#ifdef ALT_BITSTREAM_READER
#ifdef ALIGNED_BITSTREAM
#ifdef ARCH_X86
#define SHOW_BITS(s, val, n) \
val= be2me_32( ((uint32_t *)(s)->buffer)[bit_cnt>>5] );\
{uint32_t result2= be2me_32( ((uint32_t *)(s)->buffer)[(bit_cnt>>5) + 1] );\
asm ("shldl %%cl, %2, %0\n\t"\
: "=r" (val)\
: "0" (val), "r" (result2), "c" (bit_cnt));\
((uint32_t)val)>>= 32 - n;}
#else //ARCH_X86
#define SHOW_BITS(s, val, n) \
val= be2me_32( ((uint32_t *)(s)->buffer)[bit_cnt>>5] );\
{uint32_t result2= be2me_32( ((uint32_t *)(s)->buffer)[(bit_cnt>>5) + 1] );\
val<<= (bit_cnt&0x1F);\
result2= (result2>>1) >> (31-(bit_cnt&0x1F));\
val|= result2;\
((uint32_t)val)>>= 32 - n;}
#endif //!ARCH_X86
#else //ALIGNED_BITSTREAM
#define SHOW_BITS(s, val, n) \
val= be2me_32( unaligned32( ((uint8_t *)(s)->buffer)+(bit_cnt>>3) ) );\
val<<= (bit_cnt&0x07);\
((uint32_t)val)>>= 32 - n;
#endif // !ALIGNED_BITSTREAM
#define FLUSH_BITS(n) bit_cnt+=n;
#define SAVE_BITS(s) bit_cnt= (s)->index;
#define RESTORE_BITS(s) (s)->index= bit_cnt;
#else
/* macro to go faster */
/* n must be <= 24 */
/* XXX: optimize buffer end test */
#define SHOW_BITS(s, val, n)\
{\
if (bit_cnt < n && buf_ptr < (s)->buf_end) {\
bit_buf |= *buf_ptr++ << (24 - bit_cnt);\
bit_cnt += 8;\
if (bit_cnt < n && buf_ptr < (s)->buf_end) {\
bit_buf |= *buf_ptr++ << (24 - bit_cnt);\
bit_cnt += 8;\
if (bit_cnt < n && buf_ptr < (s)->buf_end) {\
bit_buf |= *buf_ptr++ << (24 - bit_cnt);\
bit_cnt += 8;\
}\
}\
}\
val = bit_buf >> (32 - n);\
}
/* SHOW_BITS with n1 >= n must be been done before */
#define FLUSH_BITS(n)\
{\
bit_buf <<= n;\
bit_cnt -= n;\
}
#define SAVE_BITS(s) \
{\
bit_cnt = (s)->bit_cnt;\
bit_buf = (s)->bit_buf;\
buf_ptr = (s)->buf_ptr;\
}
#define RESTORE_BITS(s) \
{\
(s)->buf_ptr = buf_ptr;\
(s)->bit_buf = bit_buf;\
(s)->bit_cnt = bit_cnt;\
}
#endif // !ALT_BITSTREAM_READER
static inline int get_vlc(GetBitContext *s, VLC *vlc)
{
int code, n, nb_bits, index;
INT16 *table_codes;
INT8 *table_bits;
int bit_cnt;
#ifndef ALT_BITSTREAM_READER
UINT32 bit_buf;
UINT8 *buf_ptr;
#endif
VLC_TYPE (*table)[2];
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
SAVE_BITS(s);
nb_bits = vlc->bits;
table_codes = vlc->table_codes;
table_bits = vlc->table_bits;
table = vlc->table;
#ifdef FAST_GET_FIRST_VLC
SHOW_BITS(s, index, nb_bits);
code = table_codes[index];
n = table_bits[index];
index= SHOW_UBITS(re, s, nb_bits);
code = table[index][0];
n = table[index][1];
if (n > 0) {
/* most common case (90%)*/
FLUSH_BITS(n);
#ifdef DUMP_STREAM
{
int n= bit_cnt - s->index;
skip_bits(s, n);
RESTORE_BITS(s);
}
#endif
RESTORE_BITS(s);
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
return code;
} else if (n == 0) {
return -1;
} else {
FLUSH_BITS(nb_bits);
LAST_SKIP_BITS(re, s, nb_bits)
UPDATE_CACHE(re, s) //this isnt needed but its faster if its here
nb_bits = -n;
table_codes = vlc->table_codes + code;
table_bits = vlc->table_bits + code;
table = vlc->table + code;
}
#endif
for(;;) {
SHOW_BITS(s, index, nb_bits);
code = table_codes[index];
n = table_bits[index];
index= SHOW_UBITS(re, s, nb_bits);
code = table[index][0];
n = table[index][1];
if (n > 0) {
/* most common case */
FLUSH_BITS(n);
SKIP_BITS(re, s, n)
#ifdef STATS
st_bit_counts[st_current_index] += n;
#endif
@ -793,23 +762,16 @@ static inline int get_vlc(GetBitContext *s, VLC *vlc)
} else if (n == 0) {
return -1;
} else {
FLUSH_BITS(nb_bits);
LAST_SKIP_BITS(re, s, nb_bits)
UPDATE_CACHE(re, s)
#ifdef STATS
st_bit_counts[st_current_index] += nb_bits;
#endif
nb_bits = -n;
table_codes = vlc->table_codes + code;
table_bits = vlc->table_bits + code;
table = vlc->table + code;
}
}
#ifdef DUMP_STREAM
{
int n= bit_cnt - s->index;
skip_bits(s, n);
RESTORE_BITS(s);
}
#endif
RESTORE_BITS(s);
CLOSE_READER(re, s)
return code;
}

View File

@ -961,21 +961,20 @@ static int mpeg1_decode_block(MpegEncContext *s,
dprintf("dc=%d diff=%d\n", dc, diff);
i = 1;
} else {
int bit_cnt, v;
UINT32 bit_buf;
UINT8 *buf_ptr;
int v;
OPEN_READER(re, &s->gb);
i = 0;
/* special case for the first coef. no need to add a second vlc table */
SAVE_BITS(&s->gb);
SHOW_BITS(&s->gb, v, 2);
UPDATE_CACHE(re, &s->gb);
v= SHOW_UBITS(re, &s->gb, 2);
if (v & 2) {
run = 0;
level = 1 - ((v & 1) << 1);
FLUSH_BITS(2);
RESTORE_BITS(&s->gb);
SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
goto add_coef;
}
RESTORE_BITS(&s->gb);
CLOSE_READER(re, &s->gb);
}
/* now quantify & encode AC coefs */
@ -1035,26 +1034,25 @@ static int mpeg2_decode_block_non_intra(MpegEncContext *s,
mismatch = 1;
{
int bit_cnt, v;
UINT32 bit_buf;
UINT8 *buf_ptr;
int v;
OPEN_READER(re, &s->gb);
i = 0;
if (n < 4)
if (n < 4)
matrix = s->inter_matrix;
else
matrix = s->chroma_inter_matrix;
/* special case for the first coef. no need to add a second vlc table */
SAVE_BITS(&s->gb);
SHOW_BITS(&s->gb, v, 2);
UPDATE_CACHE(re, &s->gb);
v= SHOW_UBITS(re, &s->gb, 2);
if (v & 2) {
run = 0;
level = 1 - ((v & 1) << 1);
FLUSH_BITS(2);
RESTORE_BITS(&s->gb);
SKIP_BITS(re, &s->gb, 2);
CLOSE_READER(re, &s->gb);
goto add_coef;
}
RESTORE_BITS(&s->gb);
CLOSE_READER(re, &s->gb);
}
/* now quantify & encode AC coefs */

View File

@ -1457,11 +1457,8 @@ static void seek_to_maindata(MPADecodeContext *s, long backstep)
UINT8 *ptr;
/* compute current position in stream */
#ifdef ALT_BITSTREAM_READER
ptr = s->gb.buffer + (s->gb.index>>3);
#else
ptr = s->gb.buf_ptr - (s->gb.bit_cnt >> 3);
#endif
ptr = s->gb.buffer + (get_bits_count(&s->gb)>>3);
/* copy old data before current one */
ptr -= backstep;
memcpy(ptr, s->inbuf1[s->inbuf_index ^ 1] +
@ -1547,9 +1544,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
{
int s_index;
int linbits, code, x, y, l, v, i, j, k, pos;
UINT8 *last_buf_ptr;
UINT32 last_bit_buf;
int last_bit_cnt;
GetBitContext last_gb;
VLC *vlc;
UINT8 *code_table;
@ -1608,36 +1603,20 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
/* high frequencies */
vlc = &huff_quad_vlc[g->count1table_select];
last_buf_ptr = NULL;
last_bit_buf = 0;
last_bit_cnt = 0;
last_gb.buffer = NULL;
while (s_index <= 572) {
pos = get_bits_count(&s->gb);
if (pos >= end_pos) {
if (pos > end_pos && last_buf_ptr != NULL) {
if (pos > end_pos && last_gb.buffer != NULL) {
/* some encoders generate an incorrect size for this
part. We must go back into the data */
s_index -= 4;
#ifdef ALT_BITSTREAM_READER
s->gb.buffer = last_buf_ptr;
s->gb.index = last_bit_cnt;
#else
s->gb.buf_ptr = last_buf_ptr;
s->gb.bit_buf = last_bit_buf;
s->gb.bit_cnt = last_bit_cnt;
#endif
s->gb = last_gb;
}
break;
}
#ifdef ALT_BITSTREAM_READER
last_buf_ptr = s->gb.buffer;
last_bit_cnt = s->gb.index;
#else
last_buf_ptr = s->gb.buf_ptr;
last_bit_buf = s->gb.bit_buf;
last_bit_cnt = s->gb.bit_cnt;
#endif
last_gb= s->gb;
code = get_vlc(&s->gb, vlc);
dprintf("t=%d code=%d\n", g->count1table_select, code);
if (code < 0)