swresample/resample: speed up upsampling by precomputing sines

When upsampling, factor is set to 1 and sines need to be evaluated only
once for each phase, and the complexity should not depend on the number
of filter taps. This does the desired precomputation, yielding
significant speedups. Hard guarantees on the gain are not possible, but gains
themselves are obvious and are illustrated below.

Sample benchmark (x86-64, Haswell, GNU/Linux)
test: fate-swr-resample-dblp-2626-44100
old:
29161085 decicycles in build_filter (loop 1000),     256 runs,      0 skips
28821467 decicycles in build_filter (loop 1000),     512 runs,      0 skips
28668201 decicycles in build_filter (loop 1000),    1000 runs,     24 skips

new:
14351936 decicycles in build_filter (loop 1000),     256 runs,      0 skips
14306652 decicycles in build_filter (loop 1000),     512 runs,      0 skips
14299923 decicycles in build_filter (loop 1000),    1000 runs,     24 skips

Note that this does not statically allocate the sin lookup table. This
may be done for the default 1024 phases, yielding a 512*8 = 4kB array
which should be small enough.
This should yield a small improvement. Nevertheless, this is separate from
this patch, is more ambiguous due to the binary increase, and requires a
lut to be generated offline.

Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
This commit is contained in:
Ganesh Ajjanagadde 2015-11-08 21:39:32 -05:00
parent b02201efb5
commit b87ca4bf25
1 changed files with 17 additions and 4 deletions

View File

@ -144,24 +144,34 @@ static double bessel(double x) {
static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int alloc, int phase_count, int scale, static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int alloc, int phase_count, int scale,
int filter_type, double kaiser_beta){ int filter_type, double kaiser_beta){
int ph, i; int ph, i;
double x, y, w, t; double x, y, w, t, s;
double *tab = av_malloc_array(tap_count+1, sizeof(*tab)); double *tab = av_malloc_array(tap_count+1, sizeof(*tab));
double *sin_lut = av_malloc_array(phase_count / 2 + 1, sizeof(*sin_lut));
const int center= (tap_count-1)/2; const int center= (tap_count-1)/2;
if (!tab) if (!tab || !sin_lut)
return AVERROR(ENOMEM); goto fail;
/* if upsampling, only need to interpolate, no filter */ /* if upsampling, only need to interpolate, no filter */
if (factor > 1.0) if (factor > 1.0)
factor = 1.0; factor = 1.0;
av_assert0(phase_count == 1 || phase_count % 2 == 0); av_assert0(phase_count == 1 || phase_count % 2 == 0);
if (factor == 1.0) {
for (ph = 0; ph <= phase_count / 2; ph++)
sin_lut[ph] = sin(M_PI * ph / phase_count);
}
for(ph = 0; ph <= phase_count / 2; ph++) { for(ph = 0; ph <= phase_count / 2; ph++) {
double norm = 0; double norm = 0;
s = sin_lut[ph];
for(i=0;i<=tap_count;i++) { for(i=0;i<=tap_count;i++) {
x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor; x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor;
if (x == 0) y = 1.0; if (x == 0) y = 1.0;
else y = sin(x) / x; else if (factor == 1.0)
y = s / x;
else
y = sin(x) / x;
switch(filter_type){ switch(filter_type){
case SWR_FILTER_TYPE_CUBIC:{ case SWR_FILTER_TYPE_CUBIC:{
const float d= -0.5; //first order derivative = -0.5 const float d= -0.5; //first order derivative = -0.5
@ -183,6 +193,7 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
} }
tab[i] = y; tab[i] = y;
s = -s;
if (i < tap_count) if (i < tap_count)
norm += y; norm += y;
} }
@ -278,7 +289,9 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
} }
#endif #endif
fail:
av_free(tab); av_free(tab);
av_free(sin_lut);
return 0; return 0;
} }