ffmpeg/tests/checkasm/hevc_deblock.c

/*
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include <string.h>

#include "libavutil/intreadwrite.h"
#include "libavutil/macros.h"
#include "libavutil/mem_internal.h"

#include "libavcodec/hevcdsp.h"

#include "checkasm.h"

static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };

#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
#define BUF_STRIDE (16 * 2)
#define BUF_LINES (16)
// large buffer sizes based on high bit depth
#define BUF_OFFSET (2 * BUF_STRIDE * BUF_LINES)
#define BUF_SIZE (2 * BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2)

#define randomize_buffers(buf0, buf1, size)                 \
    do {                                                    \
        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];   \
        int k;                                              \
        for (k = 0; k < size; k += 4) {                     \
            uint32_t r = rnd() & mask;                      \
            AV_WN32A(buf0 + k, r);                          \
            AV_WN32A(buf1 + k, r);                          \
        }                                                   \
    } while (0)

static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth, int c)
{
    // see tctable[] in hevc_filter.c, we check full range
    int32_t tc[2] = { rnd() % 25, rnd() % 25 };
    // no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c
    // variant) functions, see deblocking_filter_CTB() in hevc_filter.c
    uint8_t no_p[2] = { rnd() & c, rnd() & c };
    uint8_t no_q[2] = { rnd() & c, rnd() & c };
    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);

    declare_func(void, uint8_t *pix, ptrdiff_t stride,
                 const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);

    if (check_func(c ? h->hevc_h_loop_filter_chroma_c : h->hevc_h_loop_filter_chroma,
                         "hevc_h_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
    {
        randomize_buffers(buf0, buf1, BUF_SIZE);

        call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
        call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
        if (memcmp(buf0, buf1, BUF_SIZE))
            fail();
        bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
    }

    if (check_func(c ? h->hevc_v_loop_filter_chroma_c : h->hevc_v_loop_filter_chroma,
                         "hevc_v_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
    {
        randomize_buffers(buf0, buf1, BUF_SIZE);

        call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
        call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
        if (memcmp(buf0, buf1, BUF_SIZE))
            fail();
        bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
    }
}

#define P3 buf[-4 * xstride]
#define P2 buf[-3 * xstride]
#define P1 buf[-2 * xstride]
#define P0 buf[-1 * xstride]
#define Q0 buf[0 * xstride]
#define Q1 buf[1 * xstride]
#define Q2 buf[2 * xstride]
#define Q3 buf[3 * xstride]

#define TC25(x) ((tc[x] * 5 + 1) >> 1)
#define MASK(x) (uint16_t)(x & ((1 << (bit_depth)) - 1))
#define GET(x) ((SIZEOF_PIXEL == 1) ? *(uint8_t*)(&x) : *(uint16_t*)(&x))
#define SET(x, y) do { \
    uint16_t z = MASK(y); \
    if (SIZEOF_PIXEL == 1) \
        *(uint8_t*)(&x) = z; \
    else \
        *(uint16_t*)(&x) = z; \
} while (0)
#define RANDCLIP(x, diff) av_clip(GET(x) - (diff), 0, \
    (1 << (bit_depth)) - 1) + rnd() % FFMAX(2 * (diff), 1)

// NOTE: this function doesn't work 'correctly' in that it won't always choose
// strong/strong or weak/weak, in most cases it tends to but will sometimes mix
// weak/strong or even skip sometimes. This is more useful to test correctness
// for these functions, though it does make benching them difficult. The easiest
// way to bench these functions is to check an overall decode since there are too
// many paths and ways to trigger the deblock: we would have to bench all
// permutations of weak/strong/skip/nd_q/nd_p/no_q/no_p and it quickly becomes
// too much.
static void randomize_luma_buffers(int type, int *beta, int32_t tc[2],
   uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)
{
    int i, j, b3, tc25, tc25diff, b3diff;
    // both tc & beta are unscaled inputs
    // minimum useful value is 1, full range 0-24
    tc[0] = (rnd() % 25) + 1;
    tc[1] = (rnd() % 25) + 1;
    // minimum useful value for 8bit is 8
    *beta = (rnd() % 57) + 8;

    switch (type) {
    case 0: // strong
        for (j = 0; j < 2; j++) {
            tc25 = TC25(j) << (bit_depth - 8);
            tc25diff = FFMAX(tc25 - 1, 0);
            // 4 lines per tc
            for (i = 0; i < 4; i++) {
                b3 = (*beta << (bit_depth - 8)) >> 3;

                SET(P0, rnd() % (1 << bit_depth));
                SET(Q0, RANDCLIP(P0, tc25diff));

                // p3 - p0 up to beta3 budget
                b3diff = rnd() % b3;
                SET(P3, RANDCLIP(P0, b3diff));
                // q3 - q0, reduced budget
                b3diff = rnd() % FFMAX(b3 - b3diff, 1);
                SET(Q3, RANDCLIP(Q0, b3diff));

                // same concept, budget across 4 pixels
                b3 -= b3diff = rnd() % FFMAX(b3, 1);
                SET(P2, RANDCLIP(P0, b3diff));
                b3 -= b3diff = rnd() % FFMAX(b3, 1);
                SET(Q2, RANDCLIP(Q0, b3diff));

                // extra reduced budget for weighted pixels
                b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                SET(P1, RANDCLIP(P0, b3diff));
                b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                SET(Q1, RANDCLIP(Q0, b3diff));

                buf += ystride;
            }
        }
        break;
    case 1: // weak
        for (j = 0; j < 2; j++) {
            tc25 = TC25(j) << (bit_depth - 8);
            tc25diff = FFMAX(tc25 - 1, 0);
            // 4 lines per tc
            for (i = 0; i < 4; i++) {
                // Weak filtering is signficantly simpler to activate as
                // we only need to satisfy d0 + d3 < beta, which
                // can be simplified to d0 + d0 < beta. Using the above
                // derivations but substiuting b3 for b1 and ensuring
                // that P0/Q0 are at least 1/2 tc25diff apart (tending
                // towards 1/2 range).
                b3 = (*beta << (bit_depth - 8)) >> 1;

                SET(P0, rnd() % (1 << bit_depth));
                SET(Q0, RANDCLIP(P0, tc25diff >> 1) +
                    (tc25diff >> 1) * (P0 < (1 << (bit_depth - 1))) ? 1 : -1);

                // p3 - p0 up to beta3 budget
                b3diff = rnd() % b3;
                SET(P3, RANDCLIP(P0, b3diff));
                // q3 - q0, reduced budget
                b3diff = rnd() % FFMAX(b3 - b3diff, 1);
                SET(Q3, RANDCLIP(Q0, b3diff));

                // same concept, budget across 4 pixels
                b3 -= b3diff = rnd() % FFMAX(b3, 1);
                SET(P2, RANDCLIP(P0, b3diff));
                b3 -= b3diff = rnd() % FFMAX(b3, 1);
                SET(Q2, RANDCLIP(Q0, b3diff));

                // extra reduced budget for weighted pixels
                b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                SET(P1, RANDCLIP(P0, b3diff));
                b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                SET(Q1, RANDCLIP(Q0, b3diff));

                buf += ystride;
            }
        }
        break;
    case 2: // none
        *beta = 0; // ensure skip
        for (i = 0; i < 8; i++) {
            // we can just fill with completely random data, nothing should be touched.
            SET(P3, rnd()); SET(P2, rnd()); SET(P1, rnd()); SET(P0, rnd());
            SET(Q0, rnd()); SET(Q1, rnd()); SET(Q2, rnd()); SET(Q3, rnd());
            buf += ystride;
        }
        break;
    }
}

static void check_deblock_luma(HEVCDSPContext *h, int bit_depth, int c)
{
    const char *type;
    const char *types[3] = { "strong", "weak", "skip" };
    int beta;
    int32_t tc[2] = {0};
    uint8_t no_p[2] = { rnd() & c, rnd() & c };
    uint8_t no_q[2] = { rnd() & c, rnd() & c };
    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
    uint8_t *ptr0 = buf0 + BUF_OFFSET,
            *ptr1 = buf1 + BUF_OFFSET;

    declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta,
                 const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);
    memset(buf0, 0, BUF_SIZE);

    for (int j = 0; j < 3; j++) {
        type = types[j];
        if (check_func(c ? h->hevc_h_loop_filter_luma_c : h->hevc_h_loop_filter_luma,
                             "hevc_h_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
        {
            randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, 16 * SIZEOF_PIXEL, SIZEOF_PIXEL, bit_depth);
            memcpy(buf1, buf0, BUF_SIZE);

            call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
            call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
            if (memcmp(buf0, buf1, BUF_SIZE))
                fail();
            bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
        }

        if (check_func(c ? h->hevc_v_loop_filter_luma_c : h->hevc_v_loop_filter_luma,
                             "hevc_v_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
        {
            randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, SIZEOF_PIXEL, 16 * SIZEOF_PIXEL, bit_depth);
            memcpy(buf1, buf0, BUF_SIZE);

            call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
            call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
            if (memcmp(buf0, buf1, BUF_SIZE))
                fail();
            bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
        }
    }
}

void checkasm_check_hevc_deblock(void)
{
    HEVCDSPContext h;
    int bit_depth;
    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
        ff_hevc_dsp_init(&h, bit_depth);
        check_deblock_chroma(&h, bit_depth, 0);
    }
    report("chroma");
    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
        ff_hevc_dsp_init(&h, bit_depth);
        check_deblock_chroma(&h, bit_depth, 1);
    }
    report("chroma_full");
    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
        ff_hevc_dsp_init(&h, bit_depth);
        check_deblock_luma(&h, bit_depth, 0);
    }
    report("luma");
    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
        ff_hevc_dsp_init(&h, bit_depth);
        check_deblock_luma(&h, bit_depth, 1);
    }
    report("luma_full");
}
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`/*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License along`
			`* with FFmpeg; if not, write to the Free Software Foundation, Inc.,`
			`* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.`
			`*/`

			`#include <string.h>`

			`#include "libavutil/intreadwrite.h"`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`#include "libavutil/macros.h"`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`#include "libavutil/mem_internal.h"`

			`#include "libavcodec/hevcdsp.h"`

			`#include "checkasm.h"`

			`static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };`

			`#define SIZEOF_PIXEL ((bit_depth + 7) / 8)`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`#define BUF_STRIDE (16 * 2)`
			`#define BUF_LINES (16)`
			`// large buffer sizes based on high bit depth`
			`#define BUF_OFFSET (2 * BUF_STRIDE * BUF_LINES)`
			`#define BUF_SIZE (2 * BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2)`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00
			`#define randomize_buffers(buf0, buf1, size) \`
			`do { \`
			`uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \`
			`int k; \`
			`for (k = 0; k < size; k += 4) { \`
			`uint32_t r = rnd() & mask; \`
			`AV_WN32A(buf0 + k, r); \`
			`AV_WN32A(buf1 + k, r); \`
			`} \`
			`} while (0)`

checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth, int c)`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`{`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`// see tctable[] in hevc_filter.c, we check full range`
			`int32_t tc[2] = { rnd() % 25, rnd() % 25 };`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`// no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c`
			`// variant) functions, see deblocking_filter_CTB() in hevc_filter.c`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`uint8_t no_p[2] = { rnd() & c, rnd() & c };`
			`uint8_t no_q[2] = { rnd() & c, rnd() & c };`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);`
			`LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);`

tests/checkasm/hevc_*: Fix funtion pointer types Forgotten in b3bbbb14d0685c8c1fbcf8455e59c7f444290c7c. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2024-05-12 23:22:25 +00:00			`declare_func(void, uint8_t *pix, ptrdiff_t stride,`
			`const int32_t tc, const uint8_t no_p, const uint8_t *no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`if (check_func(c ? h->hevc_h_loop_filter_chroma_c : h->hevc_h_loop_filter_chroma,`
			`"hevc_h_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))`
			`{`
			`randomize_buffers(buf0, buf1, BUF_SIZE);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`if (memcmp(buf0, buf1, BUF_SIZE))`
			`fail();`
			`bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`}`

			`if (check_func(c ? h->hevc_v_loop_filter_chroma_c : h->hevc_v_loop_filter_chroma,`
			`"hevc_v_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))`
			`{`
			`randomize_buffers(buf0, buf1, BUF_SIZE);`

			`call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`if (memcmp(buf0, buf1, BUF_SIZE))`
			`fail();`
			`bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`}`
			`}`

			`#define P3 buf[-4 * xstride]`
			`#define P2 buf[-3 * xstride]`
			`#define P1 buf[-2 * xstride]`
			`#define P0 buf[-1 * xstride]`
			`#define Q0 buf[0 * xstride]`
			`#define Q1 buf[1 * xstride]`
			`#define Q2 buf[2 * xstride]`
			`#define Q3 buf[3 * xstride]`

			`#define TC25(x) ((tc[x] * 5 + 1) >> 1)`
			`#define MASK(x) (uint16_t)(x & ((1 << (bit_depth)) - 1))`
			`#define GET(x) ((SIZEOF_PIXEL == 1) ? (uint8_t)(&x) : (uint16_t)(&x))`
			`#define SET(x, y) do { \`
			`uint16_t z = MASK(y); \`
			`if (SIZEOF_PIXEL == 1) \`
			`(uint8_t)(&x) = z; \`
			`else \`
			`(uint16_t)(&x) = z; \`
			`} while (0)`
			`#define RANDCLIP(x, diff) av_clip(GET(x) - (diff), 0, \`
			`(1 << (bit_depth)) - 1) + rnd() % FFMAX(2 * (diff), 1)`

			`// NOTE: this function doesn't work 'correctly' in that it won't always choose`
			`// strong/strong or weak/weak, in most cases it tends to but will sometimes mix`
			`// weak/strong or even skip sometimes. This is more useful to test correctness`
			`// for these functions, though it does make benching them difficult. The easiest`
			`// way to bench these functions is to check an overall decode since there are too`
			`// many paths and ways to trigger the deblock: we would have to bench all`
			`// permutations of weak/strong/skip/nd_q/nd_p/no_q/no_p and it quickly becomes`
			`// too much.`
			`static void randomize_luma_buffers(int type, int *beta, int32_t tc[2],`
			`uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)`
			`{`
			`int i, j, b3, tc25, tc25diff, b3diff;`
			`// both tc & beta are unscaled inputs`
			`// minimum useful value is 1, full range 0-24`
			`tc[0] = (rnd() % 25) + 1;`
			`tc[1] = (rnd() % 25) + 1;`
			`// minimum useful value for 8bit is 8`
			`*beta = (rnd() % 57) + 8;`

			`switch (type) {`
			`case 0: // strong`
			`for (j = 0; j < 2; j++) {`
			`tc25 = TC25(j) << (bit_depth - 8);`
			`tc25diff = FFMAX(tc25 - 1, 0);`
			`// 4 lines per tc`
			`for (i = 0; i < 4; i++) {`
			`b3 = (*beta << (bit_depth - 8)) >> 3;`

			`SET(P0, rnd() % (1 << bit_depth));`
			`SET(Q0, RANDCLIP(P0, tc25diff));`

			`// p3 - p0 up to beta3 budget`
			`b3diff = rnd() % b3;`
			`SET(P3, RANDCLIP(P0, b3diff));`
			`// q3 - q0, reduced budget`
			`b3diff = rnd() % FFMAX(b3 - b3diff, 1);`
			`SET(Q3, RANDCLIP(Q0, b3diff));`

			`// same concept, budget across 4 pixels`
			`b3 -= b3diff = rnd() % FFMAX(b3, 1);`
			`SET(P2, RANDCLIP(P0, b3diff));`
			`b3 -= b3diff = rnd() % FFMAX(b3, 1);`
			`SET(Q2, RANDCLIP(Q0, b3diff));`

			`// extra reduced budget for weighted pixels`
			`b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);`
			`SET(P1, RANDCLIP(P0, b3diff));`
			`b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);`
			`SET(Q1, RANDCLIP(Q0, b3diff));`

			`buf += ystride;`
			`}`
			`}`
			`break;`
			`case 1: // weak`
			`for (j = 0; j < 2; j++) {`
			`tc25 = TC25(j) << (bit_depth - 8);`
			`tc25diff = FFMAX(tc25 - 1, 0);`
			`// 4 lines per tc`
			`for (i = 0; i < 4; i++) {`
			`// Weak filtering is signficantly simpler to activate as`
			`// we only need to satisfy d0 + d3 < beta, which`
			`// can be simplified to d0 + d0 < beta. Using the above`
			`// derivations but substiuting b3 for b1 and ensuring`
			`// that P0/Q0 are at least 1/2 tc25diff apart (tending`
			`// towards 1/2 range).`
			`b3 = (*beta << (bit_depth - 8)) >> 1;`

			`SET(P0, rnd() % (1 << bit_depth));`
			`SET(Q0, RANDCLIP(P0, tc25diff >> 1) +`
			`(tc25diff >> 1) * (P0 < (1 << (bit_depth - 1))) ? 1 : -1);`

			`// p3 - p0 up to beta3 budget`
			`b3diff = rnd() % b3;`
			`SET(P3, RANDCLIP(P0, b3diff));`
			`// q3 - q0, reduced budget`
			`b3diff = rnd() % FFMAX(b3 - b3diff, 1);`
			`SET(Q3, RANDCLIP(Q0, b3diff));`

			`// same concept, budget across 4 pixels`
			`b3 -= b3diff = rnd() % FFMAX(b3, 1);`
			`SET(P2, RANDCLIP(P0, b3diff));`
			`b3 -= b3diff = rnd() % FFMAX(b3, 1);`
			`SET(Q2, RANDCLIP(Q0, b3diff));`

			`// extra reduced budget for weighted pixels`
			`b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);`
			`SET(P1, RANDCLIP(P0, b3diff));`
			`b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);`
			`SET(Q1, RANDCLIP(Q0, b3diff));`

			`buf += ystride;`
			`}`
			`}`
			`break;`
			`case 2: // none`
			`*beta = 0; // ensure skip`
			`for (i = 0; i < 8; i++) {`
			`// we can just fill with completely random data, nothing should be touched.`
			`SET(P3, rnd()); SET(P2, rnd()); SET(P1, rnd()); SET(P0, rnd());`
			`SET(Q0, rnd()); SET(Q1, rnd()); SET(Q2, rnd()); SET(Q3, rnd());`
			`buf += ystride;`
			`}`
			`break;`
			`}`
			`}`

			`static void check_deblock_luma(HEVCDSPContext *h, int bit_depth, int c)`
			`{`
			`const char *type;`
			`const char *types[3] = { "strong", "weak", "skip" };`
			`int beta;`
			`int32_t tc[2] = {0};`
			`uint8_t no_p[2] = { rnd() & c, rnd() & c };`
			`uint8_t no_q[2] = { rnd() & c, rnd() & c };`
			`LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);`
			`LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);`
			`uint8_t *ptr0 = buf0 + BUF_OFFSET,`
			`*ptr1 = buf1 + BUF_OFFSET;`

tests/checkasm/hevc_*: Fix funtion pointer types Forgotten in b3bbbb14d0685c8c1fbcf8455e59c7f444290c7c. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2024-05-12 23:22:25 +00:00			`declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta,`
			`const int32_t tc, const uint8_t no_p, const uint8_t *no_q);`
checkasm/hevc_deblock: Initialize buffer Fixes the hevc_deblock checkasm test with Valgrind. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2024-03-05 10:33:12 +00:00			`memset(buf0, 0, BUF_SIZE);`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00
			`for (int j = 0; j < 3; j++) {`
			`type = types[j];`
			`if (check_func(c ? h->hevc_h_loop_filter_luma_c : h->hevc_h_loop_filter_luma,`
			`"hevc_h_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))`
			`{`
			`randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, 16 * SIZEOF_PIXEL, SIZEOF_PIXEL, bit_depth);`
			`memcpy(buf1, buf0, BUF_SIZE);`

			`call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
			`call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`if (memcmp(buf0, buf1, BUF_SIZE))`
			`fail();`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`}`

checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`if (check_func(c ? h->hevc_v_loop_filter_luma_c : h->hevc_v_loop_filter_luma,`
			`"hevc_v_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))`
			`{`
			`randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, SIZEOF_PIXEL, 16 * SIZEOF_PIXEL, bit_depth);`
			`memcpy(buf1, buf0, BUF_SIZE);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
			`call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`if (memcmp(buf0, buf1, BUF_SIZE))`
			`fail();`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`}`
			`}`
			`}`

			`void checkasm_check_hevc_deblock(void)`
			`{`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`HEVCDSPContext h;`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`int bit_depth;`
			`for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {`
			`ff_hevc_dsp_init(&h, bit_depth);`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`check_deblock_chroma(&h, bit_depth, 0);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`}`
			`report("chroma");`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {`
			`ff_hevc_dsp_init(&h, bit_depth);`
			`check_deblock_chroma(&h, bit_depth, 1);`
			`}`
			`report("chroma_full");`
			`for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {`
			`ff_hevc_dsp_init(&h, bit_depth);`
			`check_deblock_luma(&h, bit_depth, 0);`
			`}`
			`report("luma");`
			`for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {`
			`ff_hevc_dsp_init(&h, bit_depth);`
			`check_deblock_luma(&h, bit_depth, 1);`
			`}`
			`report("luma_full");`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`}`