mirror of https://git.ffmpeg.org/ffmpeg.git
Rewrite main resampling loop (common and linear).
This removes a branch at a performance-sensitive point (in the middle of the loop). In fate-swr-resample-s32p-8000-2626, this makes the code about 10% faster. It also simplifies the loops, allowing us to rewrite it in yasm at some later point. The compensation_distance != 0 code and index < 0 code are still kind of hairy. For compensation_distance != 0, this should likely be handled in the caller, so that it calls swri_resample twice (once until the dst_incr switch-point, and once with the remainder of the samples). For index < 0, the code should probably be rewritten to break out of the loop once sample_index >= 0, and then resume (e.g. as a tail-call) to the common or linear resampling loops. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
e91f27cbbb
commit
9b53853756
|
@ -134,17 +134,19 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
|
|||
av_assert2(index >= 0);
|
||||
*consumed= index;
|
||||
index = 0;
|
||||
}else if(compensation_distance == 0 && !c->linear && index >= 0){
|
||||
int sample_index = 0;
|
||||
for(dst_index=0; dst_index < dst_size; dst_index++){
|
||||
FELEM *filter;
|
||||
sample_index += index >> c->phase_shift;
|
||||
index &= c->phase_mask;
|
||||
filter= ((FELEM*)c->filter_bank) + c->filter_alloc*index;
|
||||
} else if (compensation_distance == 0 && index >= 0) {
|
||||
int64_t end_index = (1 + src_size - c->filter_length) << c->phase_shift;
|
||||
int64_t delta_frac = (end_index - index) * c->src_incr - c->frac;
|
||||
int delta_n = (delta_frac + c->dst_incr - 1) / c->dst_incr;
|
||||
int n = FFMIN(dst_size, delta_n);
|
||||
int sample_index;
|
||||
|
||||
if (!c->linear) {
|
||||
sample_index = index >> c->phase_shift;
|
||||
index &= c->phase_mask;
|
||||
for (dst_index = 0; dst_index < n; dst_index++) {
|
||||
FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
|
||||
|
||||
if(sample_index + c->filter_length > src_size){
|
||||
break;
|
||||
}else{
|
||||
#ifdef COMMON_CORE
|
||||
COMMON_CORE
|
||||
#else
|
||||
|
@ -154,7 +156,6 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
|
|||
}
|
||||
OUT(dst[dst_index], val);
|
||||
#endif
|
||||
}
|
||||
|
||||
frac += dst_incr_frac;
|
||||
index += dst_incr;
|
||||
|
@ -162,7 +163,38 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
|
|||
frac -= c->src_incr;
|
||||
index++;
|
||||
}
|
||||
sample_index += index >> c->phase_shift;
|
||||
index &= c->phase_mask;
|
||||
}
|
||||
} else {
|
||||
sample_index = index >> c->phase_shift;
|
||||
index &= c->phase_mask;
|
||||
for (dst_index = 0; dst_index < n; dst_index++) {
|
||||
FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
|
||||
FELEM2 val=0, v2 = 0;
|
||||
|
||||
#ifdef LINEAR_CORE
|
||||
LINEAR_CORE
|
||||
#else
|
||||
for (i = 0; i < c->filter_length; i++) {
|
||||
val += src[sample_index + i] * (FELEM2)filter[i];
|
||||
v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc];
|
||||
}
|
||||
#endif
|
||||
val += (v2 - val) * (FELEML) frac / c->src_incr;
|
||||
OUT(dst[dst_index], val);
|
||||
|
||||
frac += dst_incr_frac;
|
||||
index += dst_incr;
|
||||
if (frac >= c->src_incr) {
|
||||
frac -= c->src_incr;
|
||||
index++;
|
||||
}
|
||||
sample_index += index >> c->phase_shift;
|
||||
index &= c->phase_mask;
|
||||
}
|
||||
}
|
||||
|
||||
*consumed = sample_index;
|
||||
} else {
|
||||
int sample_index = 0;
|
||||
|
|
Loading…
Reference in New Issue