From 516c479172755c63063180b0c0953b68b670cdbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 29 Aug 2017 23:23:12 +0300 Subject: [PATCH] checkasm: Test more h264 idct variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Martin Storsjö --- tests/checkasm/h264dsp.c | 90 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c index c9ddd52a7f..f355a72a74 100644 --- a/tests/checkasm/h264dsp.c +++ b/tests/checkasm/h264dsp.c @@ -22,6 +22,7 @@ #include "checkasm.h" #include "libavcodec/avcodec.h" #include "libavcodec/h264dsp.h" +#include "libavcodec/h264data.h" #include "libavutil/common.h" #include "libavutil/internal.h" #include "libavutil/intreadwrite.h" @@ -223,10 +224,97 @@ static void check_idct(void) } } } - report("idct"); +} + +static void check_idct_multiple(void) +{ + LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]); + LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]); + LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]); + LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]); + LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]); + LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]); + LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]); + H264DSPContext h; + int bit_depth, i, y, func; + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]); + + for (bit_depth = 8; bit_depth <= 10; bit_depth++) { + ff_h264dsp_init(&h, bit_depth, 1); + for (func = 0; func < 3; func++) { + void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL; + const char *name; + int sz = 4, intra = 0; + int block_offset[16] = { 0 }; + switch (func) { + case 0: + idct = h.h264_idct_add16; + name = "h264_idct_add16"; + break; + case 1: + idct = h.h264_idct_add16intra; + name = "h264_idct_add16intra"; + intra = 1; + break; + case 2: + idct = h.h264_idct8_add4; + name = "h264_idct8_add4"; + sz = 8; + break; + } + memset(nnzc, 0, 15 * 8); + memset(coef_full, 0, 16 * 16 * SIZEOF_COEF); + for (i = 0; i < 16 * 16; i += sz * sz) { + uint8_t src[8 * 8 * 2]; + uint8_t dst[8 * 8 * 2]; + int16_t coef[8 * 8 * 2]; + int index = i / sz; + int block_y = (index / 16) * sz; + int block_x = index % 16; + int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL; + int nnz = rnd() % 3; + + randomize_buffers(); + if (sz == 4) + dct4x4(coef, bit_depth); + else + dct8x8(coef, bit_depth); + + for (y = 0; y < sz; y++) + memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL], + &dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL); + + if (nnz > 1) + nnz = sz * sz; + memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])], + coef, nnz * SIZEOF_COEF); + + if (intra && nnz == 1) + nnz = 0; + + nnzc[scan8[i / 16]] = nnz; + block_offset[i / 16] = offset; + } + + if (check_func(idct, "%s_%dbpp", name, bit_depth)) { + memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF); + memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF); + memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL); + memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL); + call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc); + call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); + if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) || + memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF)) + fail(); + bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); + } + } + } } void checkasm_check_h264dsp(void) { check_idct(); + check_idct_multiple(); + report("idct"); }