mirror of https://git.ffmpeg.org/ffmpeg.git
optimize compute_antialias() and add a floating point based alternative (2x faster)
Originally committed as revision 2679 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
ec7d0d2e9e
commit
a1e257b231
|
@ -960,6 +960,8 @@ typedef struct AVCodecContext {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* qscale factor between p and i frames.
|
* qscale factor between p and i frames.
|
||||||
|
* if > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset)
|
||||||
|
* if < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset)
|
||||||
* - encoding: set by user.
|
* - encoding: set by user.
|
||||||
* - decoding: unused
|
* - decoding: unused
|
||||||
*/
|
*/
|
||||||
|
@ -967,8 +969,6 @@ typedef struct AVCodecContext {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* qscale offset between p and i frames.
|
* qscale offset between p and i frames.
|
||||||
* if > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset)
|
|
||||||
* if < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset)
|
|
||||||
* - encoding: set by user.
|
* - encoding: set by user.
|
||||||
* - decoding: unused
|
* - decoding: unused
|
||||||
*/
|
*/
|
||||||
|
@ -1490,6 +1490,17 @@ typedef struct AVCodecContext {
|
||||||
* - decoding: unused.
|
* - decoding: unused.
|
||||||
*/
|
*/
|
||||||
int error_rate;
|
int error_rate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MP3 antialias algorithm, see FF_AA_* below.
|
||||||
|
* - encoding: unused
|
||||||
|
* - decoding: set by user
|
||||||
|
*/
|
||||||
|
int antialias_algo;
|
||||||
|
#define FF_AA_AUTO 0
|
||||||
|
#define FF_AA_FASTINT 1 //not implemented yet
|
||||||
|
#define FF_AA_INT 2
|
||||||
|
#define FF_AA_FLOAT 3
|
||||||
} AVCodecContext;
|
} AVCodecContext;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -66,6 +66,8 @@ typedef int32_t MPA_INT;
|
||||||
#define HEADER_SIZE 4
|
#define HEADER_SIZE 4
|
||||||
#define BACKSTEP_SIZE 512
|
#define BACKSTEP_SIZE 512
|
||||||
|
|
||||||
|
struct GranuleDef;
|
||||||
|
|
||||||
typedef struct MPADecodeContext {
|
typedef struct MPADecodeContext {
|
||||||
uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */
|
uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE]; /* input buffer */
|
||||||
int inbuf_index;
|
int inbuf_index;
|
||||||
|
@ -93,6 +95,7 @@ typedef struct MPADecodeContext {
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
int frame_count;
|
int frame_count;
|
||||||
#endif
|
#endif
|
||||||
|
void (*compute_antialias)(struct MPADecodeContext *s, struct GranuleDef *g);
|
||||||
} MPADecodeContext;
|
} MPADecodeContext;
|
||||||
|
|
||||||
/* layer 3 "granule" */
|
/* layer 3 "granule" */
|
||||||
|
@ -127,6 +130,9 @@ typedef struct HuffTable {
|
||||||
|
|
||||||
#include "mpegaudiodectab.h"
|
#include "mpegaudiodectab.h"
|
||||||
|
|
||||||
|
static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
|
||||||
|
static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);
|
||||||
|
|
||||||
/* vlc structure for decoding layer 3 huffman tables */
|
/* vlc structure for decoding layer 3 huffman tables */
|
||||||
static VLC huff_vlc[16];
|
static VLC huff_vlc[16];
|
||||||
static uint8_t *huff_code_table[16];
|
static uint8_t *huff_code_table[16];
|
||||||
|
@ -144,7 +150,8 @@ static uint32_t *table_4_3_value;
|
||||||
/* intensity stereo coef table */
|
/* intensity stereo coef table */
|
||||||
static int32_t is_table[2][16];
|
static int32_t is_table[2][16];
|
||||||
static int32_t is_table_lsf[2][2][16];
|
static int32_t is_table_lsf[2][2][16];
|
||||||
static int32_t csa_table[8][2];
|
static int32_t csa_table[8][4];
|
||||||
|
static float csa_table_float[8][4];
|
||||||
static int32_t mdct_win[8][36];
|
static int32_t mdct_win[8][36];
|
||||||
|
|
||||||
/* lower 2 bits: modulo 3, higher bits: shift */
|
/* lower 2 bits: modulo 3, higher bits: shift */
|
||||||
|
@ -455,6 +462,10 @@ static int decode_init(AVCodecContext * avctx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(avctx->antialias_algo == FF_AA_INT)
|
||||||
|
s->compute_antialias= compute_antialias_integer;
|
||||||
|
else
|
||||||
|
s->compute_antialias= compute_antialias_float;
|
||||||
for(i=0;i<8;i++) {
|
for(i=0;i<8;i++) {
|
||||||
float ci, cs, ca;
|
float ci, cs, ca;
|
||||||
ci = ci_table[i];
|
ci = ci_table[i];
|
||||||
|
@ -462,6 +473,13 @@ static int decode_init(AVCodecContext * avctx)
|
||||||
ca = cs * ci;
|
ca = cs * ci;
|
||||||
csa_table[i][0] = FIX(cs);
|
csa_table[i][0] = FIX(cs);
|
||||||
csa_table[i][1] = FIX(ca);
|
csa_table[i][1] = FIX(ca);
|
||||||
|
csa_table[i][2] = FIX(ca) + FIX(cs);
|
||||||
|
csa_table[i][3] = FIX(ca) - FIX(cs);
|
||||||
|
csa_table_float[i][0] = cs;
|
||||||
|
csa_table_float[i][1] = ca;
|
||||||
|
csa_table_float[i][2] = ca + cs;
|
||||||
|
csa_table_float[i][3] = ca - cs;
|
||||||
|
// printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* compute mdct windows */
|
/* compute mdct windows */
|
||||||
|
@ -1892,11 +1910,11 @@ static void compute_stereo(MPADecodeContext *s,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void compute_antialias(MPADecodeContext *s,
|
static void compute_antialias_integer(MPADecodeContext *s,
|
||||||
GranuleDef *g)
|
GranuleDef *g)
|
||||||
{
|
{
|
||||||
int32_t *ptr, *p0, *p1, *csa;
|
int32_t *ptr, *p0, *p1, *csa;
|
||||||
int n, tmp0, tmp1, i, j;
|
int n, i, j;
|
||||||
|
|
||||||
/* we antialias only "long" bands */
|
/* we antialias only "long" bands */
|
||||||
if (g->block_type == 2) {
|
if (g->block_type == 2) {
|
||||||
|
@ -1913,14 +1931,82 @@ static void compute_antialias(MPADecodeContext *s,
|
||||||
p0 = ptr - 1;
|
p0 = ptr - 1;
|
||||||
p1 = ptr;
|
p1 = ptr;
|
||||||
csa = &csa_table[0][0];
|
csa = &csa_table[0][0];
|
||||||
for(j=0;j<8;j++) {
|
for(j=0;j<4;j++) {
|
||||||
tmp0 = *p0;
|
int tmp0 = *p0;
|
||||||
tmp1 = *p1;
|
int tmp1 = *p1;
|
||||||
|
#if 0
|
||||||
*p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1]));
|
*p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1]));
|
||||||
*p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0]));
|
*p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0]));
|
||||||
p0--;
|
#else
|
||||||
p1++;
|
int64_t tmp2= MUL64(tmp0 + tmp1, csa[0]);
|
||||||
csa += 2;
|
*p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2]));
|
||||||
|
*p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3]));
|
||||||
|
#endif
|
||||||
|
p0--; p1++;
|
||||||
|
csa += 4;
|
||||||
|
tmp0 = *p0;
|
||||||
|
tmp1 = *p1;
|
||||||
|
#if 0
|
||||||
|
*p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1]));
|
||||||
|
*p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0]));
|
||||||
|
#else
|
||||||
|
tmp2= MUL64(tmp0 + tmp1, csa[0]);
|
||||||
|
*p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2]));
|
||||||
|
*p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3]));
|
||||||
|
#endif
|
||||||
|
p0--; p1++;
|
||||||
|
csa += 4;
|
||||||
|
}
|
||||||
|
ptr += 18;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void compute_antialias_float(MPADecodeContext *s,
|
||||||
|
GranuleDef *g)
|
||||||
|
{
|
||||||
|
int32_t *ptr, *p0, *p1;
|
||||||
|
int n, i, j;
|
||||||
|
|
||||||
|
/* we antialias only "long" bands */
|
||||||
|
if (g->block_type == 2) {
|
||||||
|
if (!g->switch_point)
|
||||||
|
return;
|
||||||
|
/* XXX: check this for 8000Hz case */
|
||||||
|
n = 1;
|
||||||
|
} else {
|
||||||
|
n = SBLIMIT - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr = g->sb_hybrid + 18;
|
||||||
|
for(i = n;i > 0;i--) {
|
||||||
|
float *csa = &csa_table_float[0][0];
|
||||||
|
p0 = ptr - 1;
|
||||||
|
p1 = ptr;
|
||||||
|
for(j=0;j<4;j++) {
|
||||||
|
float tmp0 = *p0;
|
||||||
|
float tmp1 = *p1;
|
||||||
|
#if 1
|
||||||
|
*p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]);
|
||||||
|
*p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]);
|
||||||
|
#else
|
||||||
|
float tmp2= (tmp0 + tmp1) * csa[0];
|
||||||
|
*p0 = lrintf(tmp2 - tmp1 * csa[2]);
|
||||||
|
*p1 = lrintf(tmp2 + tmp0 * csa[3]);
|
||||||
|
#endif
|
||||||
|
p0--; p1++;
|
||||||
|
csa += 4;
|
||||||
|
tmp0 = *p0;
|
||||||
|
tmp1 = *p1;
|
||||||
|
#if 1
|
||||||
|
*p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]);
|
||||||
|
*p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]);
|
||||||
|
#else
|
||||||
|
tmp2= (tmp0 + tmp1) * csa[0];
|
||||||
|
*p0 = lrintf(tmp2 - tmp1 * csa[2]);
|
||||||
|
*p1 = lrintf(tmp2 + tmp0 * csa[3]);
|
||||||
|
#endif
|
||||||
|
p0--; p1++;
|
||||||
|
csa += 4;
|
||||||
}
|
}
|
||||||
ptr += 18;
|
ptr += 18;
|
||||||
}
|
}
|
||||||
|
@ -2352,7 +2438,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
sample_dump(0, g->sb_hybrid, 576);
|
sample_dump(0, g->sb_hybrid, 576);
|
||||||
#endif
|
#endif
|
||||||
compute_antialias(s, g);
|
s->compute_antialias(s, g);
|
||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
sample_dump(1, g->sb_hybrid, 576);
|
sample_dump(1, g->sb_hybrid, 576);
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue