mirror of https://git.ffmpeg.org/ffmpeg.git
SBR DSP: unroll sum_square
The length is even, so some unrolling can be performed. Timings are for x86: - 32bits: 102c -> 82c - 64bits: 82c -> 69c Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
parent
294c05ce8a
commit
dabf8dd34a
|
@ -35,13 +35,18 @@ static void sbr_sum64x5_c(float *z)
|
||||||
|
|
||||||
static float sbr_sum_square_c(float (*x)[2], int n)
|
static float sbr_sum_square_c(float (*x)[2], int n)
|
||||||
{
|
{
|
||||||
float sum = 0.0f;
|
float sum0 = 0.0f, sum1 = 0.0f;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i += 2)
|
||||||
sum += x[i][0] * x[i][0] + x[i][1] * x[i][1];
|
{
|
||||||
|
sum0 += x[i + 0][0] * x[i + 0][0];
|
||||||
|
sum1 += x[i + 0][1] * x[i + 0][1];
|
||||||
|
sum0 += x[i + 1][0] * x[i + 1][0];
|
||||||
|
sum1 += x[i + 1][1] * x[i + 1][1];
|
||||||
|
}
|
||||||
|
|
||||||
return sum;
|
return sum0 + sum1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sbr_neg_odd_64_c(float *x)
|
static void sbr_neg_odd_64_c(float *x)
|
||||||
|
|
Loading…
Reference in New Issue