lavfi/dctdnoiz: move DC normalization out of loops.

Make code slightly faster, simpler, clearer. The filter is still slow as hell, and that change won't cause any visible performance improvement (it still takes more than one minute to process a single 1080p frame on a Core 2 here).
2013-05-26 22:32:24 +02:00 · 2013-05-26 22:32:24 +02:00 · b439ece51c
parent bd89b2b22a
commit b439ece51c
1 changed files with 13 additions and 11 deletions
--- a/libavfilter/vf_dctdnoiz.c
+++ b/libavfilter/vf_dctdnoiz.c
@ -82,9 +82,10 @@ static float *dct_block(DCTdnoizContext *ctx, const float *src, int src_linesize
        av_dct_calc(ctx->dct, line);
        column = ctx->tmp_block + y;
-        for (x = 0; x < BSIZE; x++) {
+        column[0] = line[0] * (1. / sqrt(BSIZE));
-            *line *= x == 0 ? 1. / sqrt(BSIZE) : sqrt(2. / BSIZE);
+        column += BSIZE;
-            *column = *line++;
+        for (x = 1; x < BSIZE; x++) {
            *column = line[x] * sqrt(2. / BSIZE);
            column += BSIZE;
        }
    }
@ -92,8 +93,9 @@ static float *dct_block(DCTdnoizContext *ctx, const float *src, int src_linesize
    column = ctx->tmp_block;
    for (x = 0; x < BSIZE; x++) {
        av_dct_calc(ctx->dct, column);
-        for (y = 0; y < BSIZE; y++)
+        column[0] *= 1. / sqrt(BSIZE);
-            column[y] *= y == 0 ? 1. / sqrt(BSIZE) : sqrt(2. / BSIZE);
+        for (y = 1; y < BSIZE; y++)
            column[y] *= sqrt(2. / BSIZE);
        column += BSIZE;
    }
@ -111,18 +113,18 @@ static void idct_block(DCTdnoizContext *ctx, float *dst, int dst_linesize)
    float *tmp = ctx->tmp_block;
    for (y = 0; y < BSIZE; y++) {
-        for (x = 0; x < BSIZE; x++)
+        block[0] *= sqrt(BSIZE);
-            block[x] *= x == 0 ? sqrt(BSIZE) : 1./sqrt(2. / BSIZE);
+        for (x = 1; x < BSIZE; x++)
            block[x] *= 1./sqrt(2. / BSIZE);
        av_dct_calc(ctx->idct, block);
        block += BSIZE;
    }
    block = ctx->block;
    for (y = 0; y < BSIZE; y++) {
-        for (x = 0; x < BSIZE; x++) {
+        tmp[0] = block[y] * sqrt(BSIZE);
-            tmp[x] = block[x*BSIZE + y];
+        for (x = 1; x < BSIZE; x++)
-            tmp[x] *= x == 0 ? sqrt(BSIZE) : 1./sqrt(2. / BSIZE);
+            tmp[x] = block[x*BSIZE + y] * (1./sqrt(2. / BSIZE));
        }
        av_dct_calc(ctx->idct, tmp);
        for (x = 0; x < BSIZE; x++)
            dst[x*dst_linesize + y] += tmp[x];