ffmpeg/tests/checkasm/hevc_deblock.c

/*
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include <string.h>

#include "libavutil/intreadwrite.h"
#include "libavutil/macros.h"
#include "libavutil/mem_internal.h"

#include "libavcodec/hevcdsp.h"

#include "checkasm.h"

static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };

#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
#define BUF_STRIDE (16 * 2)
#define BUF_LINES (16)
// large buffer sizes based on high bit depth
#define BUF_OFFSET (2 * BUF_STRIDE * BUF_LINES)
#define BUF_SIZE (2 * BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2)

#define randomize_buffers(buf0, buf1, size)                 \
    do {                                                    \
        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];   \
        int k;                                              \
        for (k = 0; k < size; k += 4) {                     \
            uint32_t r = rnd() & mask;                      \
            AV_WN32A(buf0 + k, r);                          \
            AV_WN32A(buf1 + k, r);                          \
        }                                                   \
    } while (0)

static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth, int c)
{
    // see tctable[] in hevc_filter.c, we check full range
    int32_t tc[2] = { rnd() % 25, rnd() % 25 };
    // no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c
    // variant) functions, see deblocking_filter_CTB() in hevc_filter.c
    uint8_t no_p[2] = { rnd() & c, rnd() & c };
    uint8_t no_q[2] = { rnd() & c, rnd() & c };
    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);

    declare_func(void, uint8_t *pix, ptrdiff_t stride, int32_t *tc, uint8_t *no_p, uint8_t *no_q);

    if (check_func(c ? h->hevc_h_loop_filter_chroma_c : h->hevc_h_loop_filter_chroma,
                         "hevc_h_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
    {
        randomize_buffers(buf0, buf1, BUF_SIZE);

        call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
        call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
        if (memcmp(buf0, buf1, BUF_SIZE))
            fail();
        bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
    }

    if (check_func(c ? h->hevc_v_loop_filter_chroma_c : h->hevc_v_loop_filter_chroma,
                         "hevc_v_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
    {
        randomize_buffers(buf0, buf1, BUF_SIZE);

        call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
        call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
        if (memcmp(buf0, buf1, BUF_SIZE))
            fail();
        bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);
    }
}

#define P3 buf[-4 * xstride]
#define P2 buf[-3 * xstride]
#define P1 buf[-2 * xstride]
#define P0 buf[-1 * xstride]
#define Q0 buf[0 * xstride]
#define Q1 buf[1 * xstride]
#define Q2 buf[2 * xstride]
#define Q3 buf[3 * xstride]

#define TC25(x) ((tc[x] * 5 + 1) >> 1)
#define MASK(x) (uint16_t)(x & ((1 << (bit_depth)) - 1))
#define GET(x) ((SIZEOF_PIXEL == 1) ? *(uint8_t*)(&x) : *(uint16_t*)(&x))
#define SET(x, y) do { \
    uint16_t z = MASK(y); \
    if (SIZEOF_PIXEL == 1) \
        *(uint8_t*)(&x) = z; \
    else \
        *(uint16_t*)(&x) = z; \
} while (0)
#define RANDCLIP(x, diff) av_clip(GET(x) - (diff), 0, \
    (1 << (bit_depth)) - 1) + rnd() % FFMAX(2 * (diff), 1)

// NOTE: this function doesn't work 'correctly' in that it won't always choose
// strong/strong or weak/weak, in most cases it tends to but will sometimes mix
// weak/strong or even skip sometimes. This is more useful to test correctness
// for these functions, though it does make benching them difficult. The easiest
// way to bench these functions is to check an overall decode since there are too
// many paths and ways to trigger the deblock: we would have to bench all
// permutations of weak/strong/skip/nd_q/nd_p/no_q/no_p and it quickly becomes
// too much.
static void randomize_luma_buffers(int type, int *beta, int32_t tc[2],
   uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)
{
    int i, j, b3, tc25, tc25diff, b3diff;
    // both tc & beta are unscaled inputs
    // minimum useful value is 1, full range 0-24
    tc[0] = (rnd() % 25) + 1;
    tc[1] = (rnd() % 25) + 1;
    // minimum useful value for 8bit is 8
    *beta = (rnd() % 57) + 8;

    switch (type) {
    case 0: // strong
        for (j = 0; j < 2; j++) {
            tc25 = TC25(j) << (bit_depth - 8);
            tc25diff = FFMAX(tc25 - 1, 0);
            // 4 lines per tc
            for (i = 0; i < 4; i++) {
                b3 = (*beta << (bit_depth - 8)) >> 3;

                SET(P0, rnd() % (1 << bit_depth));
                SET(Q0, RANDCLIP(P0, tc25diff));

                // p3 - p0 up to beta3 budget
                b3diff = rnd() % b3;
                SET(P3, RANDCLIP(P0, b3diff));
                // q3 - q0, reduced budget
                b3diff = rnd() % FFMAX(b3 - b3diff, 1);
                SET(Q3, RANDCLIP(Q0, b3diff));

                // same concept, budget across 4 pixels
                b3 -= b3diff = rnd() % FFMAX(b3, 1);
                SET(P2, RANDCLIP(P0, b3diff));
                b3 -= b3diff = rnd() % FFMAX(b3, 1);
                SET(Q2, RANDCLIP(Q0, b3diff));

                // extra reduced budget for weighted pixels
                b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                SET(P1, RANDCLIP(P0, b3diff));
                b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                SET(Q1, RANDCLIP(Q0, b3diff));

                buf += ystride;
            }
        }
        break;
    case 1: // weak
        for (j = 0; j < 2; j++) {
            tc25 = TC25(j) << (bit_depth - 8);
            tc25diff = FFMAX(tc25 - 1, 0);
            // 4 lines per tc
            for (i = 0; i < 4; i++) {
                // Weak filtering is signficantly simpler to activate as
                // we only need to satisfy d0 + d3 < beta, which
                // can be simplified to d0 + d0 < beta. Using the above
                // derivations but substiuting b3 for b1 and ensuring
                // that P0/Q0 are at least 1/2 tc25diff apart (tending
                // towards 1/2 range).
                b3 = (*beta << (bit_depth - 8)) >> 1;

                SET(P0, rnd() % (1 << bit_depth));
                SET(Q0, RANDCLIP(P0, tc25diff >> 1) +
                    (tc25diff >> 1) * (P0 < (1 << (bit_depth - 1))) ? 1 : -1);

                // p3 - p0 up to beta3 budget
                b3diff = rnd() % b3;
                SET(P3, RANDCLIP(P0, b3diff));
                // q3 - q0, reduced budget
                b3diff = rnd() % FFMAX(b3 - b3diff, 1);
                SET(Q3, RANDCLIP(Q0, b3diff));

                // same concept, budget across 4 pixels
                b3 -= b3diff = rnd() % FFMAX(b3, 1);
                SET(P2, RANDCLIP(P0, b3diff));
                b3 -= b3diff = rnd() % FFMAX(b3, 1);
                SET(Q2, RANDCLIP(Q0, b3diff));

                // extra reduced budget for weighted pixels
                b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                SET(P1, RANDCLIP(P0, b3diff));
                b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);
                SET(Q1, RANDCLIP(Q0, b3diff));

                buf += ystride;
            }
        }
        break;
    case 2: // none
        *beta = 0; // ensure skip
        for (i = 0; i < 8; i++) {
            // we can just fill with completely random data, nothing should be touched.
            SET(P3, rnd()); SET(P2, rnd()); SET(P1, rnd()); SET(P0, rnd());
            SET(Q0, rnd()); SET(Q1, rnd()); SET(Q2, rnd()); SET(Q3, rnd());
            buf += ystride;
        }
        break;
    }
}

static void check_deblock_luma(HEVCDSPContext *h, int bit_depth, int c)
{
    const char *type;
    const char *types[3] = { "strong", "weak", "skip" };
    int beta;
    int32_t tc[2] = {0};
    uint8_t no_p[2] = { rnd() & c, rnd() & c };
    uint8_t no_q[2] = { rnd() & c, rnd() & c };
    LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
    LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
    uint8_t *ptr0 = buf0 + BUF_OFFSET,
            *ptr1 = buf1 + BUF_OFFSET;

    declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta, int32_t *tc, uint8_t *no_p, uint8_t *no_q);
    memset(buf0, 0, BUF_SIZE);

    for (int j = 0; j < 3; j++) {
        type = types[j];
        if (check_func(c ? h->hevc_h_loop_filter_luma_c : h->hevc_h_loop_filter_luma,
                             "hevc_h_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
        {
            randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, 16 * SIZEOF_PIXEL, SIZEOF_PIXEL, bit_depth);
            memcpy(buf1, buf0, BUF_SIZE);

            call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
            call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
            if (memcmp(buf0, buf1, BUF_SIZE))
                fail();
            bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
        }

        if (check_func(c ? h->hevc_v_loop_filter_luma_c : h->hevc_v_loop_filter_luma,
                             "hevc_v_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))
        {
            randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, SIZEOF_PIXEL, 16 * SIZEOF_PIXEL, bit_depth);
            memcpy(buf1, buf0, BUF_SIZE);

            call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
            call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
            if (memcmp(buf0, buf1, BUF_SIZE))
                fail();
            bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);
        }
    }
}

void checkasm_check_hevc_deblock(void)
{
    HEVCDSPContext h;
    int bit_depth;
    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
        ff_hevc_dsp_init(&h, bit_depth);
        check_deblock_chroma(&h, bit_depth, 0);
    }
    report("chroma");
    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
        ff_hevc_dsp_init(&h, bit_depth);
        check_deblock_chroma(&h, bit_depth, 1);
    }
    report("chroma_full");
    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
        ff_hevc_dsp_init(&h, bit_depth);
        check_deblock_luma(&h, bit_depth, 0);
    }
    report("luma");
    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
        ff_hevc_dsp_init(&h, bit_depth);
        check_deblock_luma(&h, bit_depth, 1);
    }
    report("luma_full");
}
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`/*`
			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License along`
			`* with FFmpeg; if not, write to the Free Software Foundation, Inc.,`
			`* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.`
			`*/`

			`#include <string.h>`

			`#include "libavutil/intreadwrite.h"`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`#include "libavutil/macros.h"`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`#include "libavutil/mem_internal.h"`

			`#include "libavcodec/hevcdsp.h"`

			`#include "checkasm.h"`

			`static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };`

			`#define SIZEOF_PIXEL ((bit_depth + 7) / 8)`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`#define BUF_STRIDE (16 * 2)`
			`#define BUF_LINES (16)`
			`// large buffer sizes based on high bit depth`
			`#define BUF_OFFSET (2 * BUF_STRIDE * BUF_LINES)`
			`#define BUF_SIZE (2 * BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2)`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00
			`#define randomize_buffers(buf0, buf1, size) \`
			`do { \`
			`uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \`
			`int k; \`
			`for (k = 0; k < size; k += 4) { \`
			`uint32_t r = rnd() & mask; \`
			`AV_WN32A(buf0 + k, r); \`
			`AV_WN32A(buf1 + k, r); \`
			`} \`
			`} while (0)`

checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth, int c)`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`{`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`// see tctable[] in hevc_filter.c, we check full range`
			`int32_t tc[2] = { rnd() % 25, rnd() % 25 };`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`// no_p, no_q can only be { 0,0 } for the simpler assembly (non *_c`
			`// variant) functions, see deblocking_filter_CTB() in hevc_filter.c`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`uint8_t no_p[2] = { rnd() & c, rnd() & c };`
			`uint8_t no_q[2] = { rnd() & c, rnd() & c };`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);`
			`LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);`

tests/checkasm/hevc_*: Avoid using declare_func_emms where possible Only the idct_dc and add_residual functions have MMX versions, so one can use the version with the stricter check (that checks that the MMX registers have not been clobbered) for all the other checks. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2023-08-31 20:23:54 +00:00			`declare_func(void, uint8_t pix, ptrdiff_t stride, int32_t tc, uint8_t no_p, uint8_t no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`if (check_func(c ? h->hevc_h_loop_filter_chroma_c : h->hevc_h_loop_filter_chroma,`
			`"hevc_h_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))`
			`{`
			`randomize_buffers(buf0, buf1, BUF_SIZE);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`if (memcmp(buf0, buf1, BUF_SIZE))`
			`fail();`
			`bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`}`

			`if (check_func(c ? h->hevc_v_loop_filter_chroma_c : h->hevc_v_loop_filter_chroma,`
			`"hevc_v_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))`
			`{`
			`randomize_buffers(buf0, buf1, BUF_SIZE);`

			`call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`if (memcmp(buf0, buf1, BUF_SIZE))`
			`fail();`
			`bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q);`
			`}`
			`}`

			`#define P3 buf[-4 * xstride]`
			`#define P2 buf[-3 * xstride]`
			`#define P1 buf[-2 * xstride]`
			`#define P0 buf[-1 * xstride]`
			`#define Q0 buf[0 * xstride]`
			`#define Q1 buf[1 * xstride]`
			`#define Q2 buf[2 * xstride]`
			`#define Q3 buf[3 * xstride]`

			`#define TC25(x) ((tc[x] * 5 + 1) >> 1)`
			`#define MASK(x) (uint16_t)(x & ((1 << (bit_depth)) - 1))`
			`#define GET(x) ((SIZEOF_PIXEL == 1) ? (uint8_t)(&x) : (uint16_t)(&x))`
			`#define SET(x, y) do { \`
			`uint16_t z = MASK(y); \`
			`if (SIZEOF_PIXEL == 1) \`
			`(uint8_t)(&x) = z; \`
			`else \`
			`(uint16_t)(&x) = z; \`
			`} while (0)`
			`#define RANDCLIP(x, diff) av_clip(GET(x) - (diff), 0, \`
			`(1 << (bit_depth)) - 1) + rnd() % FFMAX(2 * (diff), 1)`

			`// NOTE: this function doesn't work 'correctly' in that it won't always choose`
			`// strong/strong or weak/weak, in most cases it tends to but will sometimes mix`
			`// weak/strong or even skip sometimes. This is more useful to test correctness`
			`// for these functions, though it does make benching them difficult. The easiest`
			`// way to bench these functions is to check an overall decode since there are too`
			`// many paths and ways to trigger the deblock: we would have to bench all`
			`// permutations of weak/strong/skip/nd_q/nd_p/no_q/no_p and it quickly becomes`
			`// too much.`
			`static void randomize_luma_buffers(int type, int *beta, int32_t tc[2],`
			`uint8_t *buf, ptrdiff_t xstride, ptrdiff_t ystride, int bit_depth)`
			`{`
			`int i, j, b3, tc25, tc25diff, b3diff;`
			`// both tc & beta are unscaled inputs`
			`// minimum useful value is 1, full range 0-24`
			`tc[0] = (rnd() % 25) + 1;`
			`tc[1] = (rnd() % 25) + 1;`
			`// minimum useful value for 8bit is 8`
			`*beta = (rnd() % 57) + 8;`

			`switch (type) {`
			`case 0: // strong`
			`for (j = 0; j < 2; j++) {`
			`tc25 = TC25(j) << (bit_depth - 8);`
			`tc25diff = FFMAX(tc25 - 1, 0);`
			`// 4 lines per tc`
			`for (i = 0; i < 4; i++) {`
			`b3 = (*beta << (bit_depth - 8)) >> 3;`

			`SET(P0, rnd() % (1 << bit_depth));`
			`SET(Q0, RANDCLIP(P0, tc25diff));`

			`// p3 - p0 up to beta3 budget`
			`b3diff = rnd() % b3;`
			`SET(P3, RANDCLIP(P0, b3diff));`
			`// q3 - q0, reduced budget`
			`b3diff = rnd() % FFMAX(b3 - b3diff, 1);`
			`SET(Q3, RANDCLIP(Q0, b3diff));`

			`// same concept, budget across 4 pixels`
			`b3 -= b3diff = rnd() % FFMAX(b3, 1);`
			`SET(P2, RANDCLIP(P0, b3diff));`
			`b3 -= b3diff = rnd() % FFMAX(b3, 1);`
			`SET(Q2, RANDCLIP(Q0, b3diff));`

			`// extra reduced budget for weighted pixels`
			`b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);`
			`SET(P1, RANDCLIP(P0, b3diff));`
			`b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);`
			`SET(Q1, RANDCLIP(Q0, b3diff));`

			`buf += ystride;`
			`}`
			`}`
			`break;`
			`case 1: // weak`
			`for (j = 0; j < 2; j++) {`
			`tc25 = TC25(j) << (bit_depth - 8);`
			`tc25diff = FFMAX(tc25 - 1, 0);`
			`// 4 lines per tc`
			`for (i = 0; i < 4; i++) {`
			`// Weak filtering is signficantly simpler to activate as`
			`// we only need to satisfy d0 + d3 < beta, which`
			`// can be simplified to d0 + d0 < beta. Using the above`
			`// derivations but substiuting b3 for b1 and ensuring`
			`// that P0/Q0 are at least 1/2 tc25diff apart (tending`
			`// towards 1/2 range).`
			`b3 = (*beta << (bit_depth - 8)) >> 1;`

			`SET(P0, rnd() % (1 << bit_depth));`
			`SET(Q0, RANDCLIP(P0, tc25diff >> 1) +`
			`(tc25diff >> 1) * (P0 < (1 << (bit_depth - 1))) ? 1 : -1);`

			`// p3 - p0 up to beta3 budget`
			`b3diff = rnd() % b3;`
			`SET(P3, RANDCLIP(P0, b3diff));`
			`// q3 - q0, reduced budget`
			`b3diff = rnd() % FFMAX(b3 - b3diff, 1);`
			`SET(Q3, RANDCLIP(Q0, b3diff));`

			`// same concept, budget across 4 pixels`
			`b3 -= b3diff = rnd() % FFMAX(b3, 1);`
			`SET(P2, RANDCLIP(P0, b3diff));`
			`b3 -= b3diff = rnd() % FFMAX(b3, 1);`
			`SET(Q2, RANDCLIP(Q0, b3diff));`

			`// extra reduced budget for weighted pixels`
			`b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);`
			`SET(P1, RANDCLIP(P0, b3diff));`
			`b3 -= b3diff = rnd() % FFMAX(b3 - (1 << (bit_depth - 8)), 1);`
			`SET(Q1, RANDCLIP(Q0, b3diff));`

			`buf += ystride;`
			`}`
			`}`
			`break;`
			`case 2: // none`
			`*beta = 0; // ensure skip`
			`for (i = 0; i < 8; i++) {`
			`// we can just fill with completely random data, nothing should be touched.`
			`SET(P3, rnd()); SET(P2, rnd()); SET(P1, rnd()); SET(P0, rnd());`
			`SET(Q0, rnd()); SET(Q1, rnd()); SET(Q2, rnd()); SET(Q3, rnd());`
			`buf += ystride;`
			`}`
			`break;`
			`}`
			`}`

			`static void check_deblock_luma(HEVCDSPContext *h, int bit_depth, int c)`
			`{`
			`const char *type;`
			`const char *types[3] = { "strong", "weak", "skip" };`
			`int beta;`
			`int32_t tc[2] = {0};`
			`uint8_t no_p[2] = { rnd() & c, rnd() & c };`
			`uint8_t no_q[2] = { rnd() & c, rnd() & c };`
			`LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);`
			`LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);`
			`uint8_t *ptr0 = buf0 + BUF_OFFSET,`
			`*ptr1 = buf1 + BUF_OFFSET;`

			`declare_func(void, uint8_t pix, ptrdiff_t stride, int beta, int32_t tc, uint8_t no_p, uint8_t no_q);`
checkasm/hevc_deblock: Initialize buffer Fixes the hevc_deblock checkasm test with Valgrind. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> 2024-03-05 10:33:12 +00:00			`memset(buf0, 0, BUF_SIZE);`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00
			`for (int j = 0; j < 3; j++) {`
			`type = types[j];`
			`if (check_func(c ? h->hevc_h_loop_filter_luma_c : h->hevc_h_loop_filter_luma,`
			`"hevc_h_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))`
			`{`
			`randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, 16 * SIZEOF_PIXEL, SIZEOF_PIXEL, bit_depth);`
			`memcpy(buf1, buf0, BUF_SIZE);`

			`call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
			`call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`if (memcmp(buf0, buf1, BUF_SIZE))`
			`fail();`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`}`

checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`if (check_func(c ? h->hevc_v_loop_filter_luma_c : h->hevc_v_loop_filter_luma,`
			`"hevc_v_loop_filter_luma%d_%s%s", bit_depth, type, c ? "_full" : ""))`
			`{`
			`randomize_luma_buffers(j, &beta, tc, buf0 + BUF_OFFSET, SIZEOF_PIXEL, 16 * SIZEOF_PIXEL, bit_depth);`
			`memcpy(buf1, buf0, BUF_SIZE);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`call_ref(ptr0, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
			`call_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`if (memcmp(buf0, buf1, BUF_SIZE))`
			`fail();`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`bench_new(ptr1, 16 * SIZEOF_PIXEL, beta, tc, no_p, no_q);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`}`
			`}`
			`}`

			`void checkasm_check_hevc_deblock(void)`
			`{`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`HEVCDSPContext h;`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`int bit_depth;`
			`for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {`
			`ff_hevc_dsp_init(&h, bit_depth);`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`check_deblock_chroma(&h, bit_depth, 0);`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`}`
			`report("chroma");`
checkasm/hevc_deblock: add luma and chroma full Signed-off-by: J. Dekker <jdek@itanimul.li> 2024-01-24 11:57:04 +00:00			`for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {`
			`ff_hevc_dsp_init(&h, bit_depth);`
			`check_deblock_chroma(&h, bit_depth, 1);`
			`}`
			`report("chroma_full");`
			`for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {`
			`ff_hevc_dsp_init(&h, bit_depth);`
			`check_deblock_luma(&h, bit_depth, 0);`
			`}`
			`report("luma");`
			`for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {`
			`ff_hevc_dsp_init(&h, bit_depth);`
			`check_deblock_luma(&h, bit_depth, 1);`
			`}`
			`report("luma_full");`
checkasm: add hevc_deblock chroma test Signed-off-by: J. Dekker <jdek@itanimul.li> 2023-03-21 17:44:03 +00:00			`}`